Search in sources :

Example 6 with WorkerPorts

use of io.mantisrx.common.WorkerPorts in project mantis by Netflix.

the class LocalJobExecutorNetworked method execute.

@SuppressWarnings({ "rawtypes", "unchecked" })
public static void execute(Job job, SchedulingInfo schedulingInfo, Parameter... parameters) throws IllegalMantisJobException {
    // validate job
    try {
        new ValidateJob(job).execute();
    } catch (CommandException e) {
        throw new IllegalMantisJobException(e);
    }
    // execute job
    List<StageConfig> stages = job.getStages();
    final SourceHolder source = job.getSource();
    final SinkHolder sink = job.getSink();
    final PortSelector portSelector = new PortSelectorInRange(8000, 9000);
    // register netty metrics
    RxNetty.useMetricListenersFactory(new MantisNettyEventsListenerFactory());
    // start our metrics server
    MetricsServer metricsServer = new MetricsServer(portSelector.acquirePort(), 1, Collections.EMPTY_MAP);
    metricsServer.start();
    Lifecycle lifecycle = job.getLifecycle();
    lifecycle.startup();
    // create job context
    Map parameterDefinitions = job.getParameterDefinitions();
    final String user = Optional.ofNullable(System.getenv("USER")).orElse("userUnknown");
    String jobId = String.format("localJob-%s-%d", user, (int) (Math.random() * 10000));
    logger.info("jobID {}", jobId);
    final ServiceLocator serviceLocator = lifecycle.getServiceLocator();
    int numInstances = schedulingInfo.forStage(1).getNumberOfInstances();
    BehaviorSubject<Integer> workersInStageOneObservable = BehaviorSubject.create(numInstances);
    BehaviorSubject<WorkerMap> workerMapObservable = BehaviorSubject.create();
    if (stages.size() == 1) {
        // single stage job
        final StageConfig stage = stages.get(0);
        // use latch to wait for all instances to complete
        final CountDownLatch waitUntilAllCompleted = new CountDownLatch(numInstances);
        Action0 countDownLatchOnComplete = new Action0() {

            @Override
            public void call() {
                waitUntilAllCompleted.countDown();
            }
        };
        Action0 nullOnCompleted = new Action0() {

            @Override
            public void call() {
            }
        };
        Action1<Throwable> nullOnError = new Action1<Throwable>() {

            @Override
            public void call(Throwable t) {
            }
        };
        Map<Integer, List<WorkerInfo>> workerInfoMap = new HashMap<>();
        List<WorkerInfo> workerInfoList = new ArrayList<>();
        // run for num of instances
        for (int i = 0; i < numInstances; i++) {
            WorkerPorts workerPorts = new WorkerPorts(portSelector.acquirePort(), portSelector.acquirePort(), portSelector.acquirePort(), portSelector.acquirePort(), portSelector.acquirePort());
            WorkerInfo workerInfo = new WorkerInfo(jobId, jobId, 1, i, i + 1, MantisJobDurationType.Perpetual, "localhost", workerPorts);
            workerInfoList.add(workerInfo);
            Context context = new Context(ParameterUtils.createContextParameters(parameterDefinitions, parameters), lifecycle.getServiceLocator(), // new WorkerInfo(jobId, jobId, 1, i, i, MantisJobDurationType.Perpetual, "localhost", new ArrayList<>(),-1,-1),
            workerInfo, MetricsRegistry.getInstance(), () -> {
                System.exit(0);
            }, workerMapObservable);
            // workers for stage 1
            workerInfoMap.put(1, workerInfoList);
            workerMapObservable.onNext(new WorkerMap(workerInfoMap));
            StageExecutors.executeSingleStageJob(source, stage, sink, () -> workerInfo.getWorkerPorts().getSinkPort(), new RxMetrics(), context, countDownLatchOnComplete, i, workersInStageOneObservable, null, null, nullOnCompleted, nullOnError);
        }
        // wait for all instances to complete
        try {
            waitUntilAllCompleted.await();
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    } else {
        // multi-stage job
        int workerNumber = 0;
        // start source stages
        StageConfig currentStage = stages.get(0);
        StageConfig previousStage = null;
        StageSchedulingInfo currentStageScalingInfo = schedulingInfo.forStage(1);
        StageSchedulingInfo nextStageScalingInfo = schedulingInfo.forStage(2);
        // num ports
        int[] previousPorts = new int[currentStageScalingInfo.getNumberOfInstances()];
        Map<Integer, List<WorkerInfo>> workerInfoMap = new HashMap<>();
        List<WorkerInfo> workerInfoList = new ArrayList<>();
        for (int i = 0; i < currentStageScalingInfo.getNumberOfInstances(); i++) {
            WorkerPorts workerPorts = new WorkerPorts(portSelector.acquirePort(), portSelector.acquirePort(), portSelector.acquirePort(), portSelector.acquirePort(), portSelector.acquirePort());
            WorkerInfo workerInfo = new WorkerInfo(jobId, jobId, 1, i, i + 1, MantisJobDurationType.Perpetual, "localhost", workerPorts);
            workerInfoList.add(workerInfo);
            // int sourcePort = portSelector.acquirePort();
            int sourcePort = workerInfo.getWorkerPorts().getSinkPort();
            previousPorts[i] = sourcePort;
            Context context = new Context(ParameterUtils.createContextParameters(parameterDefinitions, parameters), serviceLocator, workerInfo, MetricsRegistry.getInstance(), nullAction, workerMapObservable);
            startSource(i, sourcePort, nextStageScalingInfo.getNumberOfInstances(), job.getSource(), currentStage, context, workersInStageOneObservable);
        }
        // workers for stage 1
        workerInfoMap.put(1, workerInfoList);
        workerMapObservable.onNext(new WorkerMap(workerInfoMap));
        // start intermediate stages, all but last stage
        for (int i = 1; i < stages.size() - 1; i++) {
            previousStage = currentStage;
            StageSchedulingInfo previousStageScalingInfo = schedulingInfo.forStage(i);
            // stages indexed starting at 1
            currentStageScalingInfo = schedulingInfo.forStage(i + 1);
            currentStage = stages.get(i);
            // stages indexed starting at 1
            nextStageScalingInfo = schedulingInfo.forStage(i + 2);
            int[] currentPorts = new int[currentStageScalingInfo.getNumberOfInstances()];
            workerInfoList = new ArrayList<>();
            for (int j = 0; j < currentStageScalingInfo.getNumberOfInstances(); j++) {
                WorkerPorts workerPorts = new WorkerPorts(portSelector.acquirePort(), portSelector.acquirePort(), portSelector.acquirePort(), portSelector.acquirePort(), portSelector.acquirePort());
                WorkerInfo workerInfo = new WorkerInfo(jobId, jobId, i + 1, j, workerNumber++, MantisJobDurationType.Perpetual, "localhost", workerPorts);
                workerInfoList.add(workerInfo);
                // int port = portSelector.acquirePort();
                int port = workerInfo.getWorkerPorts().getSinkPort();
                currentPorts[j] = port;
                Context context = new Context(ParameterUtils.createContextParameters(parameterDefinitions, parameters), serviceLocator, workerInfo, MetricsRegistry.getInstance(), nullAction, workerMapObservable);
                startIntermediate(previousPorts, port, currentStage, context, j, nextStageScalingInfo.getNumberOfInstances(), i, previousStageScalingInfo.getNumberOfInstances());
            }
            // workers for current stage
            workerInfoMap.put(i + 1, workerInfoList);
            workerMapObservable.onNext(new WorkerMap(workerInfoMap));
            previousPorts = currentPorts;
        }
        // start sink stage
        StageSchedulingInfo previousStageScalingInfo = schedulingInfo.forStage(stages.size() - 1);
        previousStage = stages.get(stages.size() - 2);
        currentStage = stages.get(stages.size() - 1);
        currentStageScalingInfo = schedulingInfo.forStage(stages.size());
        numInstances = currentStageScalingInfo.getNumberOfInstances();
        // use latch to wait for all instances to complete
        final CountDownLatch waitUntilAllCompleted = new CountDownLatch(numInstances);
        Action0 countDownLatchOnTerminated = new Action0() {

            @Override
            public void call() {
                waitUntilAllCompleted.countDown();
            }
        };
        Action0 nullOnCompleted = new Action0() {

            @Override
            public void call() {
            }
        };
        Action1<Throwable> nullOnError = new Action1<Throwable>() {

            @Override
            public void call(Throwable t) {
            }
        };
        workerInfoList = new ArrayList<>();
        for (int i = 0; i < numInstances; i++) {
            WorkerPorts workerPorts = new WorkerPorts(portSelector.acquirePort(), portSelector.acquirePort(), portSelector.acquirePort(), portSelector.acquirePort(), portSelector.acquirePort());
            WorkerInfo workerInfo = new WorkerInfo(jobId, jobId, stages.size(), i, workerNumber++, MantisJobDurationType.Perpetual, "localhost", workerPorts);
            workerInfoList.add(workerInfo);
            Context context = new Context(ParameterUtils.createContextParameters(parameterDefinitions, parameters), serviceLocator, workerInfo, MetricsRegistry.getInstance(), nullAction, workerMapObservable);
            startSink(previousStage, previousPorts, currentStage, () -> workerInfo.getWorkerPorts().getSinkPort(), sink, context, countDownLatchOnTerminated, nullOnCompleted, nullOnError, stages.size(), i, previousStageScalingInfo.getNumberOfInstances());
        }
        workerInfoMap.put(stages.size(), workerInfoList);
        workerMapObservable.onNext(new WorkerMap(workerInfoMap));
        // wait for all instances to complete
        try {
            waitUntilAllCompleted.await();
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }
    lifecycle.shutdown();
    metricsServer.shutdown();
}
Also used : HashMap(java.util.HashMap) MetricsServer(io.mantisrx.common.metrics.MetricsServer) ArrayList(java.util.ArrayList) WorkerInfo(io.mantisrx.runtime.WorkerInfo) ValidateJob(io.mantisrx.runtime.command.ValidateJob) MantisNettyEventsListenerFactory(io.mantisrx.common.metrics.netty.MantisNettyEventsListenerFactory) RxMetrics(io.reactivex.mantis.remote.observable.RxMetrics) ArrayList(java.util.ArrayList) List(java.util.List) WorkerMap(io.mantisrx.runtime.WorkerMap) Context(io.mantisrx.runtime.Context) Action0(rx.functions.Action0) Action1(rx.functions.Action1) SourceHolder(io.mantisrx.runtime.SourceHolder) Lifecycle(io.mantisrx.runtime.lifecycle.Lifecycle) CommandException(io.mantisrx.runtime.command.CommandException) CountDownLatch(java.util.concurrent.CountDownLatch) StageConfig(io.mantisrx.runtime.StageConfig) Endpoint(io.mantisrx.common.network.Endpoint) ServiceLocator(io.mantisrx.runtime.lifecycle.ServiceLocator) WorkerPorts(io.mantisrx.common.WorkerPorts) SinkHolder(io.mantisrx.runtime.SinkHolder) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) HashMap(java.util.HashMap) Map(java.util.Map) WorkerMap(io.mantisrx.runtime.WorkerMap)

Example 7 with WorkerPorts

use of io.mantisrx.common.WorkerPorts in project mantis by Netflix.

the class DataFormatAdapterTest method convertMantisJobWriteableTest.

@Test
public void convertMantisJobWriteableTest() throws Exception {
    String artifactName = "artifact";
    String version = "1.0.0";
    String clusterName = "myCluster";
    List<Label> labels = new ArrayList<>();
    Label label = new Label("myLable", "myVal");
    labels.add(label);
    List<Parameter> params = new ArrayList<>();
    Parameter param = new Parameter("myparam", "myval");
    params.add(param);
    long subTimeout = 1000;
    JobSla jobSla = new JobSla(100, 10, JobSla.StreamSLAType.Lossy, MantisJobDurationType.Perpetual, "userType");
    JobDefinition jobDefn = new JobDefinition.Builder().withArtifactName(artifactName).withName(clusterName).withLabels(labels).withParameters(params).withSchedulingInfo(DEFAULT_SCHED_INFO).withUser("user").withJobSla(jobSla).withSubscriptionTimeoutSecs(subTimeout).withNumberOfStages(DEFAULT_SCHED_INFO.getStages().size()).build();
    JobId jobId = new JobId(clusterName, 1);
    long currTime = System.currentTimeMillis();
    Instant startedAt = Instant.ofEpochMilli(currTime);
    Instant endedAt = startedAt.plusSeconds(5);
    Instant submittedAt = startedAt.minusSeconds(5);
    IMantisJobMetadata jobmeta = new MantisJobMetadataImpl.Builder().withJobDefinition(jobDefn).withJobId(jobId).withNextWorkerNumToUse(2).withSubmittedAt(submittedAt).withJobState(JobState.Launched).build();
    IMantisWorkerMetadata workerMetadata = new MantisWorkerMetadataImpl(0, 1, jobId.getId(), 1, 3, new WorkerPorts(Lists.newArrayList(8000, 9000, 9010, 9020, 9030)), WorkerState.Started, "slave", "slaveId", startedAt.toEpochMilli(), startedAt.toEpochMilli(), startedAt.toEpochMilli(), startedAt.toEpochMilli(), -1, JobCompletedReason.Normal, 0, 0, of("cluster"));
    ((MantisJobMetadataImpl) jobmeta).addJobStageIfAbsent(new MantisStageMetadataImpl.Builder().withNumStages(1).withStageNum(1).withNumWorkers(1).withJobId(jobId).withHardConstraints(Lists.newArrayList()).withSoftConstraints(Lists.newArrayList()).withMachineDefinition(DEFAULT_MACHINE_DEFINITION).build());
    ((MantisJobMetadataImpl) jobmeta).addWorkerMetadata(1, new JobWorker(workerMetadata, eventPublisher));
    MantisJobMetadata oldFormat = DataFormatAdapter.convertMantisJobMetadataToMantisJobMetadataWriteable(jobmeta);
    System.out.println("oldForamt -> " + oldFormat);
    assertEquals(jobId.getId(), oldFormat.getJobId());
    assertEquals(label, oldFormat.getLabels().get(0));
    assertEquals(param, oldFormat.getParameters().get(0));
    assertEquals(clusterName, oldFormat.getName());
    assertEquals(jobSla, oldFormat.getSla());
    assertEquals(1, oldFormat.getNumStages());
    assertEquals(subTimeout, oldFormat.getSubscriptionTimeoutSecs());
    assertEquals(2, oldFormat.getNextWorkerNumberToUse());
    assertEquals("http://" + artifactName, oldFormat.getJarUrl().toString());
    assertEquals(MantisJobState.Launched, oldFormat.getState());
    assertEquals(submittedAt.toEpochMilli(), oldFormat.getSubmittedAt());
    assertEquals("user", oldFormat.getUser());
    IMantisJobMetadata reconverted = DataFormatAdapter.convertMantisJobWriteableToMantisJobMetadata(oldFormat, eventPublisher);
    System.out.println("newForamt -> " + reconverted);
    // assertEquals(jobmeta, reconverted);
    // assertTrue(jobmeta.equals(reconverted));
    assertEquals(jobmeta.getArtifactName(), reconverted.getArtifactName());
    assertEquals(jobmeta.getClusterName(), reconverted.getClusterName());
    System.out.println("expected Jobdef " + jobmeta.getJobDefinition());
    System.out.println("actual   Jobdef " + reconverted.getJobDefinition());
    assertEquals(jobmeta.getJobDefinition(), reconverted.getJobDefinition());
    assertEquals(jobmeta.getJobId(), reconverted.getJobId());
    assertEquals(jobmeta.getJobJarUrl(), reconverted.getJobJarUrl());
    assertEquals(jobmeta.getLabels().get(0), reconverted.getLabels().get(0));
    assertEquals(jobmeta.getParameters().get(0), reconverted.getParameters().get(0));
    assertEquals(jobmeta.getMinRuntimeSecs(), reconverted.getMinRuntimeSecs());
    assertEquals(jobmeta.getNextWorkerNumberToUse(), reconverted.getNextWorkerNumberToUse());
    assertEquals(jobmeta.getSla().get(), reconverted.getSla().get());
    assertEquals(jobmeta.getSubmittedAtInstant(), reconverted.getSubmittedAtInstant());
    assertEquals(jobmeta.getState(), reconverted.getState());
    assertEquals(jobmeta.getSubscriptionTimeoutSecs(), reconverted.getSubscriptionTimeoutSecs());
    assertEquals(jobmeta.getTotalStages(), reconverted.getTotalStages());
    assertEquals(jobmeta.getUser(), reconverted.getUser());
// assertEquals(jobmeta.getSchedulingInfo(), reconverted.getSchedulingInfo());
}
Also used : MantisStageMetadataImpl(io.mantisrx.master.jobcluster.job.MantisStageMetadataImpl) Instant(java.time.Instant) Label(io.mantisrx.common.Label) ArrayList(java.util.ArrayList) IMantisJobMetadata(io.mantisrx.master.jobcluster.job.IMantisJobMetadata) JobWorker(io.mantisrx.master.jobcluster.job.worker.JobWorker) MantisJobMetadata(io.mantisrx.server.master.store.MantisJobMetadata) IMantisJobMetadata(io.mantisrx.master.jobcluster.job.IMantisJobMetadata) WorkerPorts(io.mantisrx.common.WorkerPorts) Parameter(io.mantisrx.runtime.parameter.Parameter) IMantisWorkerMetadata(io.mantisrx.master.jobcluster.job.worker.IMantisWorkerMetadata) JobSla(io.mantisrx.runtime.JobSla) MantisJobMetadataImpl(io.mantisrx.master.jobcluster.job.MantisJobMetadataImpl) NamedJobDefinition(io.mantisrx.runtime.NamedJobDefinition) MantisWorkerMetadataImpl(io.mantisrx.master.jobcluster.job.worker.MantisWorkerMetadataImpl) Test(org.junit.Test)

Example 8 with WorkerPorts

use of io.mantisrx.common.WorkerPorts in project mantis by Netflix.

the class NoOpMantisJobOperations method setWorkerMetadataWritable.

public static void setWorkerMetadataWritable(MantisWorkerMetadataWritable writable, IMantisWorkerMetadata workerMeta) {
    writable.setAcceptedAt(workerMeta.getAcceptedAt());
    writable.setLaunchedAt(workerMeta.getLaunchedAt());
    writable.setCompletedAt(workerMeta.getCompletedAt());
    writable.setStartingAt(workerMeta.getStartingAt());
    writable.setStartedAt(workerMeta.getStartedAt());
    writable.setCluster(workerMeta.getCluster());
    writable.setSlave(workerMeta.getSlave());
    writable.setSlaveID(workerMeta.getSlaveID());
    Optional<WorkerPorts> wPorts = workerMeta.getPorts();
    if (wPorts.isPresent()) {
        WorkerPorts wP = wPorts.get();
        writable.addPorts(wP.getPorts());
    }
    writable.setConsolePort(workerMeta.getConsolePort());
    writable.setDebugPort(workerMeta.getDebugPort());
    writable.setMetricsPort(workerMeta.getMetricsPort());
    writable.setCustomPort(workerMeta.getCustomPort());
    MantisJobState state = convertWorkerStateToMantisJobState(workerMeta.getState());
    try {
        switch(state) {
            case Accepted:
                writable.setStateNoValidation(state, workerMeta.getAcceptedAt(), workerMeta.getReason());
                break;
            case Launched:
                writable.setStateNoValidation(state, workerMeta.getLaunchedAt(), workerMeta.getReason());
                break;
            case StartInitiated:
                writable.setStateNoValidation(state, workerMeta.getStartingAt(), workerMeta.getReason());
                break;
            case Started:
                writable.setStateNoValidation(state, workerMeta.getStartedAt(), workerMeta.getReason());
                break;
            case Failed:
                writable.setStateNoValidation(state, workerMeta.getCompletedAt(), workerMeta.getReason());
                break;
            case Completed:
                writable.setStateNoValidation(state, workerMeta.getCompletedAt(), workerMeta.getReason());
                break;
            default:
                assert false : "Unexpected job state to set";
        }
    } catch (Exception e) {
        throw new RuntimeException("Error converting to MantisWorkerWriteable " + e.getMessage());
    }
    writable.setResubmitInfo(workerMeta.getResubmitOf(), workerMeta.getTotalResubmitCount());
    writable.setReason(workerMeta.getReason());
}
Also used : MantisJobState(io.mantisrx.runtime.MantisJobState) WorkerPorts(io.mantisrx.common.WorkerPorts) NamedJobDeleteException(io.mantisrx.server.master.store.NamedJobDeleteException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) InvalidNamedJobException(io.mantisrx.server.master.store.InvalidNamedJobException)

Example 9 with WorkerPorts

use of io.mantisrx.common.WorkerPorts in project mantis by Netflix.

the class JobClusterManagerTest method testBootstrapJobClusterAndJobsWithCorruptedWorkerPorts.

/**
 * Case for a master leader re-election when a new master re-hydrates corrupted job worker metadata.
 */
@Test
public void testBootstrapJobClusterAndJobsWithCorruptedWorkerPorts() throws IOException, io.mantisrx.server.master.persistence.exceptions.InvalidJobException {
    TestKit probe = new TestKit(system);
    JobTestHelper.deleteAllFiles();
    MantisJobStore jobStore = new MantisJobStore(new MantisStorageProviderAdapter(new io.mantisrx.server.master.store.SimpleCachedFileStorageProvider(), eventPublisher));
    MantisJobStore jobStoreSpied = Mockito.spy(jobStore);
    MantisScheduler schedulerMock = mock(MantisScheduler.class);
    ActorRef jobClusterManagerActor = system.actorOf(JobClustersManagerActor.props(jobStoreSpied, eventPublisher));
    jobClusterManagerActor.tell(new JobClusterManagerProto.JobClustersManagerInitialize(schedulerMock, false), probe.getRef());
    probe.expectMsgClass(Duration.of(10, ChronoUnit.MINUTES), JobClustersManagerInitializeResponse.class);
    String jobClusterName = "testBootStrapJobClustersAndJobs1";
    WorkerMigrationConfig migrationConfig = new WorkerMigrationConfig(MigrationStrategyEnum.PERCENTAGE, "{\"percentToMove\":60, \"intervalMs\":30000}");
    createJobClusterAndAssert(jobClusterManagerActor, jobClusterName, migrationConfig);
    submitJobAndAssert(jobClusterManagerActor, jobClusterName);
    String jobId = "testBootStrapJobClustersAndJobs1-1";
    WorkerId workerId = new WorkerId(jobId, 0, 1);
    WorkerEvent launchedEvent = new WorkerLaunched(workerId, 0, "host1", "vm1", empty(), new WorkerPorts(Lists.newArrayList(8000, 9000, 9010, 9020, 9030)));
    jobClusterManagerActor.tell(launchedEvent, probe.getRef());
    WorkerEvent startInitEvent = new WorkerStatus(new Status(workerId.getJobId(), 1, workerId.getWorkerIndex(), workerId.getWorkerNum(), TYPE.INFO, "test START_INIT", MantisJobState.StartInitiated));
    jobClusterManagerActor.tell(startInitEvent, probe.getRef());
    WorkerEvent heartBeat = new WorkerHeartbeat(new Status(jobId, 1, workerId.getWorkerIndex(), workerId.getWorkerNum(), TYPE.HEARTBEAT, "", MantisJobState.Started));
    jobClusterManagerActor.tell(heartBeat, probe.getRef());
    // get Job status
    jobClusterManagerActor.tell(new GetJobDetailsRequest("user", JobId.fromId(jobId).get()), probe.getRef());
    GetJobDetailsResponse resp2 = probe.expectMsgClass(GetJobDetailsResponse.class);
    // Ensure its launched
    assertEquals(SUCCESS, resp2.responseCode);
    JobWorker worker = new JobWorker.Builder().withWorkerIndex(0).withWorkerNumber(1).withJobId(jobId).withStageNum(1).withNumberOfPorts(5).withWorkerPorts(null).withState(WorkerState.Started).withLifecycleEventsPublisher(eventPublisher).build();
    jobStoreSpied.updateWorker(worker.getMetadata());
    // Stop job cluster Manager Actor
    system.stop(jobClusterManagerActor);
    // create new instance
    jobClusterManagerActor = system.actorOf(JobClustersManagerActor.props(jobStoreSpied, eventPublisher));
    // initialize it
    jobClusterManagerActor.tell(new JobClusterManagerProto.JobClustersManagerInitialize(schedulerMock, true), probe.getRef());
    JobClustersManagerInitializeResponse initializeResponse = probe.expectMsgClass(JobClustersManagerInitializeResponse.class);
    assertEquals(SUCCESS, initializeResponse.responseCode);
    WorkerId newWorkerId = new WorkerId(jobId, 0, 11);
    launchedEvent = new WorkerLaunched(newWorkerId, 0, "host1", "vm1", empty(), new WorkerPorts(Lists.newArrayList(8000, 9000, 9010, 9020, 9030)));
    jobClusterManagerActor.tell(launchedEvent, probe.getRef());
    // Get Cluster Config
    jobClusterManagerActor.tell(new GetJobClusterRequest("testBootStrapJobClustersAndJobs1"), probe.getRef());
    GetJobClusterResponse clusterResponse = probe.expectMsgClass(GetJobClusterResponse.class);
    assertEquals(SUCCESS, clusterResponse.responseCode);
    assertTrue(clusterResponse.getJobCluster().isPresent());
    WorkerMigrationConfig mConfig = clusterResponse.getJobCluster().get().getMigrationConfig();
    assertEquals(migrationConfig.getStrategy(), mConfig.getStrategy());
    assertEquals(migrationConfig.getConfigString(), migrationConfig.getConfigString());
    // get Job status
    jobClusterManagerActor.tell(new GetJobDetailsRequest("user", JobId.fromId("testBootStrapJobClustersAndJobs1-1").get()), probe.getRef());
    resp2 = probe.expectMsgClass(GetJobDetailsResponse.class);
    // Ensure its launched
    assertEquals(SUCCESS, resp2.responseCode);
    assertEquals(JobState.Launched, resp2.getJobMetadata().get().getState());
    IMantisWorkerMetadata mantisWorkerMetadata = resp2.getJobMetadata().get().getWorkerByIndex(1, 0).get().getMetadata();
    assertNotNull(mantisWorkerMetadata.getWorkerPorts());
    assertEquals(11, mantisWorkerMetadata.getWorkerNumber());
    assertEquals(1, mantisWorkerMetadata.getTotalResubmitCount());
    jobClusterManagerActor.tell(new GetLastSubmittedJobIdStreamRequest("testBootStrapJobClustersAndJobs1"), probe.getRef());
    GetLastSubmittedJobIdStreamResponse lastSubmittedJobIdStreamResponse = probe.expectMsgClass(Duration.of(10, ChronoUnit.MINUTES), GetLastSubmittedJobIdStreamResponse.class);
    lastSubmittedJobIdStreamResponse.getjobIdBehaviorSubject().get().take(1).toBlocking().subscribe((jId) -> {
        assertEquals(new JobId("testBootStrapJobClustersAndJobs1", 1), jId);
    });
    // Two schedules: one for the initial success, one for a resubmit from corrupted worker ports.
    verify(schedulerMock, times(2)).scheduleWorker(any());
    // One unschedule from corrupted worker ID 1 (before the resubmit).
    verify(schedulerMock, times(1)).unscheduleAndTerminateWorker(eq(workerId), any());
    try {
        Mockito.verify(jobStoreSpied).loadAllArchivedJobsAsync();
        Mockito.verify(jobStoreSpied).loadAllActiveJobs();
        Mockito.verify(jobStoreSpied).loadAllCompletedJobs();
        Mockito.verify(jobStoreSpied).archiveWorker(any());
    } catch (IOException e) {
        e.printStackTrace();
        fail();
    }
}
Also used : GetJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobClusterResponse) GetLastSubmittedJobIdStreamResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetLastSubmittedJobIdStreamResponse) ActorRef(akka.actor.ActorRef) GetJobDetailsRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsRequest) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) WorkerMigrationConfig(io.mantisrx.runtime.WorkerMigrationConfig) GetJobDetailsResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse) WorkerHeartbeat(io.mantisrx.master.jobcluster.job.worker.WorkerHeartbeat) WorkerEvent(io.mantisrx.server.master.scheduler.WorkerEvent) WorkerStatus(io.mantisrx.master.jobcluster.job.worker.WorkerStatus) IMantisWorkerMetadata(io.mantisrx.master.jobcluster.job.worker.IMantisWorkerMetadata) GetJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobClusterRequest) WorkerLaunched(io.mantisrx.server.master.scheduler.WorkerLaunched) JobId(io.mantisrx.server.master.domain.JobId) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) JobClustersManagerInitializeResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.JobClustersManagerInitializeResponse) Status(io.mantisrx.server.core.Status) WorkerStatus(io.mantisrx.master.jobcluster.job.worker.WorkerStatus) TestKit(akka.testkit.javadsl.TestKit) IOException(java.io.IOException) WorkerId(io.mantisrx.server.core.domain.WorkerId) JobWorker(io.mantisrx.master.jobcluster.job.worker.JobWorker) MantisStorageProviderAdapter(io.mantisrx.server.master.persistence.MantisStorageProviderAdapter) GetLastSubmittedJobIdStreamRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetLastSubmittedJobIdStreamRequest) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) WorkerPorts(io.mantisrx.common.WorkerPorts) Test(org.junit.Test)

Example 10 with WorkerPorts

use of io.mantisrx.common.WorkerPorts in project mantis by Netflix.

the class FakeMantisScheduler method scheduleWorker.

@Override
public void scheduleWorker(final ScheduleRequest scheduleRequest) {
    // Worker Launched
    final WorkerEvent workerLaunched = new WorkerLaunched(scheduleRequest.getWorkerId(), scheduleRequest.getStageNum(), "host1", "vm1", scheduleRequest.getPreferredCluster(), new WorkerPorts(Lists.newArrayList(8000, 9000, 9010, 9020, 9030)));
    jobClusterManagerActor.tell(workerLaunched, ActorRef.noSender());
    // fake Worker Start initiated event
    final WorkerEvent workerStartInit = new WorkerStatus(new Status(scheduleRequest.getWorkerId().getJobId(), scheduleRequest.getStageNum(), scheduleRequest.getWorkerId().getWorkerIndex(), scheduleRequest.getWorkerId().getWorkerNum(), Status.TYPE.INFO, "fake Start Initiated", MantisJobState.StartInitiated));
    jobClusterManagerActor.tell(workerStartInit, ActorRef.noSender());
    // fake Worker Heartbeat event
    final WorkerEvent workerHeartbeat = new WorkerHeartbeat(new Status(scheduleRequest.getWorkerId().getJobId(), scheduleRequest.getStageNum(), scheduleRequest.getWorkerId().getWorkerIndex(), scheduleRequest.getWorkerId().getWorkerNum(), Status.TYPE.HEARTBEAT, "fake heartbeat event", MantisJobState.Started));
    jobClusterManagerActor.tell(workerHeartbeat, ActorRef.noSender());
}
Also used : WorkerResourceStatus(io.mantisrx.server.master.scheduler.WorkerResourceStatus) WorkerStatus(io.mantisrx.master.jobcluster.job.worker.WorkerStatus) Status(io.mantisrx.server.core.Status) WorkerHeartbeat(io.mantisrx.master.jobcluster.job.worker.WorkerHeartbeat) WorkerEvent(io.mantisrx.server.master.scheduler.WorkerEvent) WorkerPorts(io.mantisrx.common.WorkerPorts) WorkerStatus(io.mantisrx.master.jobcluster.job.worker.WorkerStatus) WorkerLaunched(io.mantisrx.server.master.scheduler.WorkerLaunched)

Aggregations

WorkerPorts (io.mantisrx.common.WorkerPorts)13 JobWorker (io.mantisrx.master.jobcluster.job.worker.JobWorker)6 WorkerLaunched (io.mantisrx.server.master.scheduler.WorkerLaunched)5 Test (org.junit.Test)5 IMantisWorkerMetadata (io.mantisrx.master.jobcluster.job.worker.IMantisWorkerMetadata)4 WorkerEvent (io.mantisrx.server.master.scheduler.WorkerEvent)4 ArrayList (java.util.ArrayList)4 WorkerHeartbeat (io.mantisrx.master.jobcluster.job.worker.WorkerHeartbeat)3 WorkerStatus (io.mantisrx.master.jobcluster.job.worker.WorkerStatus)3 StageSchedulingInfo (io.mantisrx.runtime.descriptor.StageSchedulingInfo)3 Status (io.mantisrx.server.core.Status)3 ActorRef (akka.actor.ActorRef)2 TestKit (akka.testkit.javadsl.TestKit)2 Endpoint (io.mantisrx.common.network.Endpoint)2 JobClusterManagerProto (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto)2 GetJobClusterRequest (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobClusterRequest)2 GetJobClusterResponse (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobClusterResponse)2 GetJobDetailsRequest (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsRequest)2 GetJobDetailsResponse (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse)2 GetLastSubmittedJobIdStreamRequest (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetLastSubmittedJobIdStreamRequest)2