Search in sources :

Example 1 with IMantisStageMetadata

use of io.mantisrx.master.jobcluster.job.IMantisStageMetadata in project mantis by Netflix.

the class NoOpMantisJobOperations method convertMantisJobWriteableToMantisJobMetadata.

// TODO job specific migration config is not supported, migration config will be at cluster level
public static IMantisJobMetadata convertMantisJobWriteableToMantisJobMetadata(MantisJobMetadata archJob, LifecycleEventPublisher eventPublisher, boolean isArchived) throws Exception {
    if (logger.isTraceEnabled()) {
        logger.trace("DataFormatAdapter:Converting {}", archJob);
    }
    // convert stages to new format
    List<IMantisStageMetadata> convertedStageList = new ArrayList<>();
    for (MantisStageMetadata stageMeta : ((MantisJobMetadataWritable) archJob).getStageMetadata()) {
        // if this is an archived job then add workerIndex may fail as there maybe multiple workers related to a given index so skip adding workers to stage
        boolean skipAddingWorkers = false;
        if (isArchived) {
            skipAddingWorkers = true;
        }
        convertedStageList.add(convertMantisStageMetadataWriteableToMantisStageMetadata(stageMeta, eventPublisher, skipAddingWorkers));
    }
    // generate SchedulingInfo
    SchedulingInfo schedulingInfo = generateSchedulingInfo(convertedStageList);
    URL jarUrl = archJob.getJarUrl();
    Optional<String> artifactName = extractArtifactName(jarUrl);
    // generate job defn
    JobDefinition jobDefn = new JobDefinition(archJob.getName(), archJob.getUser(), artifactName.orElse(""), null, archJob.getParameters(), archJob.getSla(), archJob.getSubscriptionTimeoutSecs(), schedulingInfo, archJob.getNumStages(), archJob.getLabels(), null);
    Optional<JobId> jIdOp = JobId.fromId(archJob.getJobId());
    if (!jIdOp.isPresent()) {
        throw new IllegalArgumentException("Invalid JobId " + archJob.getJobId());
    }
    // generate job meta
    MantisJobMetadataImpl mantisJobMetadata = new MantisJobMetadataImpl(jIdOp.get(), archJob.getSubmittedAt(), archJob.getStartedAt(), jobDefn, convertMantisJobStateToJobState(archJob.getState()), archJob.getNextWorkerNumberToUse());
    // add the stages
    for (IMantisStageMetadata stageMetadata : convertedStageList) {
        mantisJobMetadata.addJobStageIfAbsent(stageMetadata);
    }
    if (logger.isTraceEnabled()) {
        logger.trace("DataFormatAdapter:Completed conversion to IMantisJobMetadata {}", mantisJobMetadata);
    }
    return mantisJobMetadata;
}
Also used : StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) ArrayList(java.util.ArrayList) URL(java.net.URL) FilterableMantisJobMetadataWritable(io.mantisrx.master.jobcluster.job.FilterableMantisJobMetadataWritable) MantisJobMetadataWritable(io.mantisrx.server.master.store.MantisJobMetadataWritable) IMantisStageMetadata(io.mantisrx.master.jobcluster.job.IMantisStageMetadata) MantisStageMetadata(io.mantisrx.server.master.store.MantisStageMetadata) IMantisStageMetadata(io.mantisrx.master.jobcluster.job.IMantisStageMetadata) MantisJobMetadataImpl(io.mantisrx.master.jobcluster.job.MantisJobMetadataImpl) MantisJobDefinition(io.mantisrx.runtime.MantisJobDefinition) NamedJobDefinition(io.mantisrx.runtime.NamedJobDefinition)

Example 2 with IMantisStageMetadata

use of io.mantisrx.master.jobcluster.job.IMantisStageMetadata in project mantis by Netflix.

the class JobClusterTest method testLostWorkerGetsReplaced.

@Test
public void testLostWorkerGetsReplaced() {
    TestKit probe = new TestKit(system);
    String clusterName = "testLostWorkerGetsReplaced";
    MantisScheduler schedulerMock = mock(MantisScheduler.class);
    // MantisJobStore jobStoreMock = mock(MantisJobStore.class);
    MantisJobStore jobStoreSpied = Mockito.spy(jobStore);
    final JobClusterDefinitionImpl fakeJobCluster = createFakeJobClusterDefn(clusterName);
    ActorRef jobClusterActor = system.actorOf(props(clusterName, jobStoreSpied, schedulerMock, eventPublisher));
    jobClusterActor.tell(new JobClusterProto.InitializeJobClusterRequest(fakeJobCluster, user, probe.getRef()), probe.getRef());
    JobClusterProto.InitializeJobClusterResponse createResp = probe.expectMsgClass(JobClusterProto.InitializeJobClusterResponse.class);
    assertEquals(SUCCESS, createResp.responseCode);
    try {
        final JobDefinition jobDefn = createJob(clusterName, 1, MantisJobDurationType.Transient);
        String jobId = clusterName + "-1";
        JobTestHelper.submitJobAndVerifySuccess(probe, clusterName, jobClusterActor, jobDefn, jobId);
        // JobTestHelper.getJobDetailsAndVerify(probe, jobClusterActor, jobId, SUCCESS, JobState.Accepted);
        // JobTestHelper.killJobAndVerify(probe, clusterName, new JobId(clusterName, 1), jobClusterActor);
        verify(jobStoreSpied, times(1)).createJobCluster(any());
        verify(jobStoreSpied, times(1)).updateJobCluster(any());
        int stageNo = 1;
        // send launched event
        WorkerId workerId = new WorkerId(jobId, 0, 1);
        // send heartbeat
        JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobClusterActor, jobId, stageNo, workerId);
        // check job status again
        jobClusterActor.tell(new GetJobDetailsRequest("nj", jobId), probe.getRef());
        // jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
        GetJobDetailsResponse resp2 = probe.expectMsgClass(GetJobDetailsResponse.class);
        System.out.println("resp " + resp2 + " msg " + resp2.message);
        assertEquals(SUCCESS, resp2.responseCode);
        // Job started
        assertEquals(JobState.Launched, resp2.getJobMetadata().get().getState());
        // send launched event
        // worker 2 gets terminated abnormally
        JobTestHelper.sendWorkerTerminatedEvent(probe, jobClusterActor, jobId, workerId);
        // replaced worker comes up and sends events
        WorkerId workerId2_replaced = new WorkerId(jobId, 0, 2);
        JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobClusterActor, jobId, stageNo, workerId2_replaced);
        jobClusterActor.tell(new GetJobDetailsRequest("nj", jobId), probe.getRef());
        GetJobDetailsResponse resp4 = probe.expectMsgClass(GetJobDetailsResponse.class);
        IMantisJobMetadata jobMeta = resp4.getJobMetadata().get();
        Map<Integer, ? extends IMantisStageMetadata> stageMetadata = jobMeta.getStageMetadata();
        IMantisStageMetadata stage = stageMetadata.get(1);
        for (JobWorker worker : stage.getAllWorkers()) {
            System.out.println("worker -> " + worker.getMetadata());
        }
        // 2 initial schedules and 1 replacement
        verify(schedulerMock, timeout(1_000).times(2)).scheduleWorker(any());
        // archive worker should get called once for the dead worker
        // verify(jobStoreMock, timeout(1_000).times(1)).archiveWorker(any());
        Mockito.verify(jobStoreSpied).archiveWorker(any());
        jobClusterActor.tell(new ListJobsRequest(), probe.getRef());
        ListJobsResponse listResp2 = probe.expectMsgClass(ListJobsResponse.class);
        assertEquals(SUCCESS, listResp2.responseCode);
        assertEquals(1, listResp2.getJobList().size());
        for (MantisJobMetadataView jb : listResp2.getJobList()) {
            System.out.println("Jb -> " + jb);
        }
    // assertEquals(jobActor, probe.getLastSender());
    } catch (InvalidJobException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        fail();
    } catch (Exception e) {
        e.printStackTrace();
        fail();
    } finally {
        system.stop(jobClusterActor);
    }
}
Also used : JobClusterProto(io.mantisrx.master.jobcluster.proto.JobClusterProto) ActorRef(akka.actor.ActorRef) GetJobDetailsRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsRequest) IMantisJobMetadata(io.mantisrx.master.jobcluster.job.IMantisJobMetadata) ListJobsResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListJobsResponse) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) TestKit(akka.testkit.javadsl.TestKit) Matchers.anyString(org.mockito.Matchers.anyString) WorkerId(io.mantisrx.server.core.domain.WorkerId) InvalidJobException(io.mantisrx.runtime.command.InvalidJobException) GetJobDetailsResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse) JobWorker(io.mantisrx.master.jobcluster.job.worker.JobWorker) ListJobsRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListJobsRequest) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) MantisJobMetadataView(io.mantisrx.master.jobcluster.job.MantisJobMetadataView) IMantisStageMetadata(io.mantisrx.master.jobcluster.job.IMantisStageMetadata) InvalidJobException(io.mantisrx.runtime.command.InvalidJobException) Test(org.junit.Test)

Example 3 with IMantisStageMetadata

use of io.mantisrx.master.jobcluster.job.IMantisStageMetadata in project mantis by Netflix.

the class SimpleCachedFileStorageProviderTest method testCreateJob.

@Test
public void testCreateJob() {
    String clusterName = "testCreateJob";
    SimpleCachedFileStorageProvider sProvider = new SimpleCachedFileStorageProvider();
    IJobClusterDefinition jobClusterDefn = JobTestHelper.generateJobClusterDefinition(clusterName);
    JobDefinition jobDefinition;
    try {
        jobDefinition = JobTestHelper.generateJobDefinition(clusterName);
        JobId jobId = JobId.fromId(clusterName + "-1").get();
        IMantisJobMetadata mantisJobMetaData = new MantisJobMetadataImpl.Builder().withJobId(jobId).withSubmittedAt(Instant.now()).withJobState(JobState.Accepted).withNextWorkerNumToUse(1).withJobDefinition(jobDefinition).build();
        sProvider.storeNewJob(mantisJobMetaData);
        SchedulingInfo schedInfo = jobDefinition.getSchedulingInfo();
        int numStages = schedInfo.getStages().size();
        for (int s = 1; s <= numStages; s++) {
            StageSchedulingInfo stage = schedInfo.getStages().get(s);
            IMantisStageMetadata msmd = new MantisStageMetadataImpl.Builder().withJobId(jobId).withStageNum(s).withNumStages(1).withMachineDefinition(stage.getMachineDefinition()).withNumWorkers(stage.getNumberOfInstances()).withHardConstraints(stage.getHardConstraints()).withSoftConstraints(stage.getSoftConstraints()).withScalingPolicy(stage.getScalingPolicy()).isScalable(stage.getScalable()).build();
            ((MantisJobMetadataImpl) mantisJobMetaData).addJobStageIfAbsent(msmd);
            sProvider.updateMantisStage(msmd);
            for (int w = 0; w < stage.getNumberOfInstances(); w++) {
                JobWorker mwmd = new JobWorker.Builder().withJobId(jobId).withWorkerIndex(w).withWorkerNumber(1).withNumberOfPorts(stage.getMachineDefinition().getNumPorts() + MANTIS_SYSTEM_ALLOCATED_NUM_PORTS).withStageNum(w + 1).withLifecycleEventsPublisher(eventPublisher).build();
                ((MantisJobMetadataImpl) mantisJobMetaData).addWorkerMetadata(1, mwmd);
                sProvider.storeWorker(mwmd.getMetadata());
            }
        }
        Optional<IMantisJobMetadata> loadedJobMetaOp = sProvider.loadActiveJob(jobId.getId());
        assertTrue(loadedJobMetaOp.isPresent());
        IMantisJobMetadata loadedJobMeta = loadedJobMetaOp.get();
        System.out.println("Original Job -> " + mantisJobMetaData);
        System.out.println("Loaded Job ->" + loadedJobMeta);
        isEqual(mantisJobMetaData, loadedJobMeta);
    } catch (Exception e) {
        e.printStackTrace();
        fail();
    }
}
Also used : StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) IMantisJobMetadata(io.mantisrx.master.jobcluster.job.IMantisJobMetadata) JobClusterAlreadyExistsException(io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException) IOException(java.io.IOException) JobWorker(io.mantisrx.master.jobcluster.job.worker.JobWorker) IJobClusterDefinition(io.mantisrx.server.master.domain.IJobClusterDefinition) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) IMantisStageMetadata(io.mantisrx.master.jobcluster.job.IMantisStageMetadata) MantisJobMetadataImpl(io.mantisrx.master.jobcluster.job.MantisJobMetadataImpl) JobDefinition(io.mantisrx.server.master.domain.JobDefinition) JobId(io.mantisrx.server.master.domain.JobId) Test(org.junit.Test)

Example 4 with IMantisStageMetadata

use of io.mantisrx.master.jobcluster.job.IMantisStageMetadata in project mantis by Netflix.

the class MantisJobStore method storeNewWorkers.

public List<? extends IMantisWorkerMetadata> storeNewWorkers(IMantisJobMetadata job, List<IMantisWorkerMetadata> workerRequests) throws IOException, InvalidJobException {
    if (logger.isTraceEnabled()) {
        logger.trace("Storing new workers for  Job {} ", job);
    }
    if (workerRequests == null || workerRequests.isEmpty())
        return null;
    String jobId = workerRequests.get(0).getJobId();
    if (logger.isDebugEnabled()) {
        logger.debug("Adding " + workerRequests.size() + " workers for job " + jobId);
    }
    List<IMantisWorkerMetadata> addedWorkers = new ArrayList<>();
    List<Integer> savedStageList = Lists.newArrayList();
    for (IMantisWorkerMetadata workerRequest : workerRequests) {
        // store stage if not stored already
        if (!savedStageList.contains(workerRequest.getStageNum())) {
            Optional<IMantisStageMetadata> stageMetadata = job.getStageMetadata(workerRequest.getStageNum());
            if (stageMetadata.isPresent()) {
                storageProvider.storeMantisStage(stageMetadata.get());
            } else {
                throw new RuntimeException(String.format("No such stage {}", workerRequest.getStageNum()));
            }
            savedStageList.add(workerRequest.getStageNum());
        }
        addedWorkers.add(workerRequest);
    }
    storageProvider.storeWorkers(jobId, addedWorkers);
    if (logger.isTraceEnabled()) {
        logger.trace("Stored new workers for Job {}", addedWorkers);
    }
    return addedWorkers;
}
Also used : ArrayList(java.util.ArrayList) IMantisStageMetadata(io.mantisrx.master.jobcluster.job.IMantisStageMetadata) IMantisWorkerMetadata(io.mantisrx.master.jobcluster.job.worker.IMantisWorkerMetadata)

Example 5 with IMantisStageMetadata

use of io.mantisrx.master.jobcluster.job.IMantisStageMetadata in project mantis by Netflix.

the class SimpleCachedFileStorageProvider method loadJob.

private Optional<IMantisJobMetadata> loadJob(String dir, String jobId) throws IOException {
    File jobFile = new File(getJobFileName(dir, jobId));
    IMantisJobMetadata job = null;
    if (jobFile.exists()) {
        try (FileInputStream fis = new FileInputStream(jobFile)) {
            job = mapper.readValue(fis, MantisJobMetadataImpl.class);
        }
        for (IMantisStageMetadata stage : readStagesFor(new File(dir), jobId)) ((MantisJobMetadataImpl) job).addJobStageIfAbsent(stage);
        for (IMantisWorkerMetadata worker : readWorkersFor(new File(dir), jobId)) {
            try {
                JobWorker jobWorker = new JobWorker.Builder().from(worker).withLifecycleEventsPublisher(eventPublisher).build();
                ((MantisJobMetadataImpl) job).addWorkerMetadata(worker.getStageNum(), jobWorker);
            } catch (InvalidJobException e) {
                logger.warn("Unexpected error adding worker index=" + worker.getWorkerIndex() + ", number=" + worker.getWorkerNumber() + " for job " + jobId + ": " + e.getMessage(), e);
            }
        }
    }
    return Optional.ofNullable(job);
}
Also used : IMantisStageMetadata(io.mantisrx.master.jobcluster.job.IMantisStageMetadata) IMantisJobMetadata(io.mantisrx.master.jobcluster.job.IMantisJobMetadata) IMantisWorkerMetadata(io.mantisrx.master.jobcluster.job.worker.IMantisWorkerMetadata) InvalidJobException(io.mantisrx.server.master.persistence.exceptions.InvalidJobException) MantisJobMetadataImpl(io.mantisrx.master.jobcluster.job.MantisJobMetadataImpl) File(java.io.File) FileInputStream(java.io.FileInputStream) JobWorker(io.mantisrx.master.jobcluster.job.worker.JobWorker)

Aggregations

IMantisStageMetadata (io.mantisrx.master.jobcluster.job.IMantisStageMetadata)8 IMantisJobMetadata (io.mantisrx.master.jobcluster.job.IMantisJobMetadata)3 MantisJobMetadataImpl (io.mantisrx.master.jobcluster.job.MantisJobMetadataImpl)3 JobWorker (io.mantisrx.master.jobcluster.job.worker.JobWorker)3 SchedulingInfo (io.mantisrx.runtime.descriptor.SchedulingInfo)3 StageSchedulingInfo (io.mantisrx.runtime.descriptor.StageSchedulingInfo)3 ArrayList (java.util.ArrayList)3 Test (org.junit.Test)3 IMantisWorkerMetadata (io.mantisrx.master.jobcluster.job.worker.IMantisWorkerMetadata)2 HashMap (java.util.HashMap)2 ActorRef (akka.actor.ActorRef)1 TestKit (akka.testkit.javadsl.TestKit)1 FilterableMantisJobMetadataWritable (io.mantisrx.master.jobcluster.job.FilterableMantisJobMetadataWritable)1 MantisJobMetadataView (io.mantisrx.master.jobcluster.job.MantisJobMetadataView)1 MantisStageMetadataImpl (io.mantisrx.master.jobcluster.job.MantisStageMetadataImpl)1 GetJobDetailsRequest (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsRequest)1 GetJobDetailsResponse (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse)1 ListJobsRequest (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListJobsRequest)1 ListJobsResponse (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.ListJobsResponse)1 JobClusterProto (io.mantisrx.master.jobcluster.proto.JobClusterProto)1