Search in sources :

Example 1 with IJobClusterDefinition

use of io.mantisrx.server.master.domain.IJobClusterDefinition in project mantis by Netflix.

the class JobClusterActor method getResolvedJobDefinition.

/**
 * Two cases
 * 1. JobDefinition provided by user: In this case check if labels / parameters or schedulingInfo was not provided
 * if that is the case inherit from the Cluster
 * 2. If JobDefinition is not provided, find the last submitted job and use its config (quick submit)
 * @param user submitter
 * @param givenJobDefnOp job defn provided by user in job submit
 * @return job definition to be used by the actual submit
 * @throws Exception If jobDefinition could not be resolved
 */
private JobDefinition getResolvedJobDefinition(final String user, final Optional<JobDefinition> givenJobDefnOp) throws Exception {
    JobDefinition resolvedJobDefn;
    if (givenJobDefnOp.isPresent()) {
        if (givenJobDefnOp.get().getSchedulingInfo() != null && givenJobDefnOp.get().requireInheritInstanceCheck()) {
            logger.warn("Job requires inheriting instance count but has no active non-terminal job.");
        }
        resolvedJobDefn = givenJobDefnOp.get();
    } else {
        // no job definition specified , this is quick submit which is supposed to inherit from last job submitted
        // for request inheriting from non-terminal jobs, it has been sent to job actor instead.
        Optional<JobDefinition> jobDefnOp = cloneJobDefinitionForQuickSubmitFromArchivedJobs(jobManager.getCompletedJobsList(), empty(), jobStore);
        if (jobDefnOp.isPresent()) {
            logger.info("Inherited scheduling Info and parameters from previous job");
            resolvedJobDefn = jobDefnOp.get();
        } else if (this.jobClusterMetadata != null && this.jobClusterMetadata.getJobClusterDefinition() != null && this.jobClusterMetadata.getJobClusterDefinition().getJobClusterConfig() != null) {
            logger.info("No previous job definition found. Fall back to cluster definition: {}", this.name);
            IJobClusterDefinition clusterDefinition = this.jobClusterMetadata.getJobClusterDefinition();
            JobClusterConfig clusterConfig = this.jobClusterMetadata.getJobClusterDefinition().getJobClusterConfig();
            resolvedJobDefn = new JobDefinition.Builder().withJobSla(new JobSla.Builder().build()).withArtifactName(clusterConfig.getArtifactName()).withVersion(clusterConfig.getVersion()).withLabels(clusterDefinition.getLabels()).withName(this.name).withParameters(clusterDefinition.getParameters()).withSchedulingInfo(clusterConfig.getSchedulingInfo()).withUser(user).build();
            logger.info("Built job definition from cluster definition: {}", resolvedJobDefn);
        } else {
            throw new Exception("Job Definition could not retrieved from a previous submission (There may " + "not be a previous submission)");
        }
    }
    logger.info("Resolved JobDefn {}", resolvedJobDefn);
    return this.jobDefinitionResolver.getResolvedJobDefinition(user, resolvedJobDefn, this.jobClusterMetadata);
}
Also used : IJobClusterDefinition(io.mantisrx.server.master.domain.IJobClusterDefinition) JobClusterConfig(io.mantisrx.server.master.domain.JobClusterConfig) JobDefinition(io.mantisrx.server.master.domain.JobDefinition) TriggerNotFoundException(com.netflix.fenzo.triggers.exceptions.TriggerNotFoundException) SchedulerException(com.netflix.fenzo.triggers.exceptions.SchedulerException) JobClusterAlreadyExistsException(io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException)

Example 2 with IJobClusterDefinition

use of io.mantisrx.server.master.domain.IJobClusterDefinition in project mantis by Netflix.

the class JobClusterActor method onJobClusterUpdate.

@Override
public void onJobClusterUpdate(final UpdateJobClusterRequest request) {
    final String name = request.getJobClusterDefinition().getName();
    final ActorRef sender = getSender();
    String givenArtifactVersion = request.getJobClusterDefinition().getJobClusterConfig().getVersion();
    if (!isVersionUnique(givenArtifactVersion, jobClusterMetadata.getJobClusterDefinition().getJobClusterConfigs())) {
        String msg = String.format("Job cluster %s not updated as the version %s is not unique", name, givenArtifactVersion);
        logger.error(msg);
        sender.tell(new UpdateJobClusterResponse(request.requestId, CLIENT_ERROR, msg), getSelf());
        return;
    }
    IJobClusterDefinition currentJobClusterDefinition = jobClusterMetadata.getJobClusterDefinition();
    JobClusterDefinitionImpl mergedJobClusterDefinition = new JobClusterDefinitionImpl.Builder().mergeConfigsAndOverrideRest(currentJobClusterDefinition, request.getJobClusterDefinition()).build();
    IJobClusterMetadata jobCluster = new JobClusterMetadataImpl.Builder().withIsDisabled(jobClusterMetadata.isDisabled()).withLastJobCount(jobClusterMetadata.getLastJobCount()).withJobClusterDefinition(mergedJobClusterDefinition).build();
    try {
        updateAndSaveJobCluster(jobCluster);
        sender.tell(new UpdateJobClusterResponse(request.requestId, SUCCESS, name + " Job cluster updated"), getSelf());
        numJobClusterUpdate.increment();
    } catch (Exception e) {
        logger.error("job cluster not created");
        sender.tell(new UpdateJobClusterResponse(request.requestId, SERVER_ERROR, name + " Job cluster updation failed " + e.getMessage()), getSelf());
        numJobClusterUpdateErrors.increment();
    }
}
Also used : UpdateJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterResponse) ActorRef(akka.actor.ActorRef) IJobClusterDefinition(io.mantisrx.server.master.domain.IJobClusterDefinition) JobClusterDefinitionImpl(io.mantisrx.server.master.domain.JobClusterDefinitionImpl) TriggerNotFoundException(com.netflix.fenzo.triggers.exceptions.TriggerNotFoundException) SchedulerException(com.netflix.fenzo.triggers.exceptions.SchedulerException) JobClusterAlreadyExistsException(io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException)

Example 3 with IJobClusterDefinition

use of io.mantisrx.server.master.domain.IJobClusterDefinition in project mantis by Netflix.

the class JobTestLifecycle method testJobSubmitInitalizationFails.

@Test
public void testJobSubmitInitalizationFails() {
    final TestKit probe = new TestKit(system);
    String clusterName = "testJobSubmitPersistenceFails";
    IJobClusterDefinition jobClusterDefn = JobTestHelper.generateJobClusterDefinition(clusterName);
    JobDefinition jobDefn;
    try {
        jobDefn = JobTestHelper.generateJobDefinition(clusterName);
        MantisScheduler schedulerMock = mock(MantisScheduler.class);
        MantisJobStore jobStoreMock = mock(MantisJobStore.class);
        Mockito.doThrow(IOException.class).when(jobStoreMock).storeNewJob(any());
        MantisJobMetadataImpl mantisJobMetaData = new MantisJobMetadataImpl.Builder().withJobId(new JobId(clusterName, 1)).withSubmittedAt(Instant.now()).withJobState(JobState.Accepted).withNextWorkerNumToUse(1).withJobDefinition(jobDefn).build();
        final ActorRef jobActor = system.actorOf(JobActor.props(jobClusterDefn, mantisJobMetaData, jobStoreMock, schedulerMock, eventPublisher));
        jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
        JobProto.JobInitialized initMsg = probe.expectMsgClass(JobProto.JobInitialized.class);
        assertEquals(SERVER_ERROR, initMsg.responseCode);
        System.out.println(initMsg.message);
        String jobId = clusterName + "-1";
        jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("nj", jobId), probe.getRef());
        // jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
        GetJobDetailsResponse resp = probe.expectMsgClass(GetJobDetailsResponse.class);
        System.out.println("resp " + resp + " msg " + resp.message);
        assertEquals(CLIENT_ERROR, resp.responseCode);
    } catch (InvalidJobException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        fail();
    } catch (Exception e) {
        e.printStackTrace();
        fail();
    }
}
Also used : ActorRef(akka.actor.ActorRef) JobProto(io.mantisrx.master.jobcluster.proto.JobProto) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) TestKit(akka.testkit.javadsl.TestKit) InvalidJobException(io.mantisrx.runtime.command.InvalidJobException) IOException(java.io.IOException) GetJobDetailsResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) IJobClusterDefinition(io.mantisrx.server.master.domain.IJobClusterDefinition) InvalidJobException(io.mantisrx.runtime.command.InvalidJobException) JobDefinition(io.mantisrx.server.master.domain.JobDefinition) JobId(io.mantisrx.server.master.domain.JobId) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) Test(org.junit.Test)

Example 4 with IJobClusterDefinition

use of io.mantisrx.server.master.domain.IJobClusterDefinition in project mantis by Netflix.

the class SimpleCachedFileStorageProviderTest method testCreateJob.

@Test
public void testCreateJob() {
    String clusterName = "testCreateJob";
    SimpleCachedFileStorageProvider sProvider = new SimpleCachedFileStorageProvider();
    IJobClusterDefinition jobClusterDefn = JobTestHelper.generateJobClusterDefinition(clusterName);
    JobDefinition jobDefinition;
    try {
        jobDefinition = JobTestHelper.generateJobDefinition(clusterName);
        JobId jobId = JobId.fromId(clusterName + "-1").get();
        IMantisJobMetadata mantisJobMetaData = new MantisJobMetadataImpl.Builder().withJobId(jobId).withSubmittedAt(Instant.now()).withJobState(JobState.Accepted).withNextWorkerNumToUse(1).withJobDefinition(jobDefinition).build();
        sProvider.storeNewJob(mantisJobMetaData);
        SchedulingInfo schedInfo = jobDefinition.getSchedulingInfo();
        int numStages = schedInfo.getStages().size();
        for (int s = 1; s <= numStages; s++) {
            StageSchedulingInfo stage = schedInfo.getStages().get(s);
            IMantisStageMetadata msmd = new MantisStageMetadataImpl.Builder().withJobId(jobId).withStageNum(s).withNumStages(1).withMachineDefinition(stage.getMachineDefinition()).withNumWorkers(stage.getNumberOfInstances()).withHardConstraints(stage.getHardConstraints()).withSoftConstraints(stage.getSoftConstraints()).withScalingPolicy(stage.getScalingPolicy()).isScalable(stage.getScalable()).build();
            ((MantisJobMetadataImpl) mantisJobMetaData).addJobStageIfAbsent(msmd);
            sProvider.updateMantisStage(msmd);
            for (int w = 0; w < stage.getNumberOfInstances(); w++) {
                JobWorker mwmd = new JobWorker.Builder().withJobId(jobId).withWorkerIndex(w).withWorkerNumber(1).withNumberOfPorts(stage.getMachineDefinition().getNumPorts() + MANTIS_SYSTEM_ALLOCATED_NUM_PORTS).withStageNum(w + 1).withLifecycleEventsPublisher(eventPublisher).build();
                ((MantisJobMetadataImpl) mantisJobMetaData).addWorkerMetadata(1, mwmd);
                sProvider.storeWorker(mwmd.getMetadata());
            }
        }
        Optional<IMantisJobMetadata> loadedJobMetaOp = sProvider.loadActiveJob(jobId.getId());
        assertTrue(loadedJobMetaOp.isPresent());
        IMantisJobMetadata loadedJobMeta = loadedJobMetaOp.get();
        System.out.println("Original Job -> " + mantisJobMetaData);
        System.out.println("Loaded Job ->" + loadedJobMeta);
        isEqual(mantisJobMetaData, loadedJobMeta);
    } catch (Exception e) {
        e.printStackTrace();
        fail();
    }
}
Also used : StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) IMantisJobMetadata(io.mantisrx.master.jobcluster.job.IMantisJobMetadata) JobClusterAlreadyExistsException(io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException) IOException(java.io.IOException) JobWorker(io.mantisrx.master.jobcluster.job.worker.JobWorker) IJobClusterDefinition(io.mantisrx.server.master.domain.IJobClusterDefinition) StageSchedulingInfo(io.mantisrx.runtime.descriptor.StageSchedulingInfo) IMantisStageMetadata(io.mantisrx.master.jobcluster.job.IMantisStageMetadata) MantisJobMetadataImpl(io.mantisrx.master.jobcluster.job.MantisJobMetadataImpl) JobDefinition(io.mantisrx.server.master.domain.JobDefinition) JobId(io.mantisrx.server.master.domain.JobId) Test(org.junit.Test)

Example 5 with IJobClusterDefinition

use of io.mantisrx.server.master.domain.IJobClusterDefinition in project mantis by Netflix.

the class SimpleCachedFileStorageProviderTest method serde.

// @Test
public void serde() throws IOException {
    String clusterName = "testCreateClusterClueter";
    File tmpFile = new File("/tmp/MantisSpool/jobClusters" + "/" + clusterName);
    tmpFile.createNewFile();
    IJobClusterDefinition jobClusterDefn = createFakeJobClusterDefn(clusterName, Lists.newArrayList());
    PrintWriter pwrtr = new PrintWriter(tmpFile);
    mapper.writeValue(pwrtr, jobClusterDefn);
    try (FileInputStream fis = new FileInputStream(tmpFile)) {
        IJobClusterDefinition jobClustermeta = mapper.readValue(fis, JobClusterDefinitionImpl.class);
        System.out.println("read: " + jobClustermeta.getName());
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : IJobClusterDefinition(io.mantisrx.server.master.domain.IJobClusterDefinition) File(java.io.File) FileInputStream(java.io.FileInputStream) JobClusterAlreadyExistsException(io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException) IOException(java.io.IOException) PrintWriter(java.io.PrintWriter)

Aggregations

IJobClusterDefinition (io.mantisrx.server.master.domain.IJobClusterDefinition)16 JobDefinition (io.mantisrx.server.master.domain.JobDefinition)14 ActorRef (akka.actor.ActorRef)13 JobId (io.mantisrx.server.master.domain.JobId)13 Test (org.junit.Test)12 TestKit (akka.testkit.javadsl.TestKit)11 JobClusterManagerProto (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto)11 JobProto (io.mantisrx.master.jobcluster.proto.JobProto)11 MantisJobStore (io.mantisrx.server.master.persistence.MantisJobStore)11 MantisScheduler (io.mantisrx.server.master.scheduler.MantisScheduler)11 IOException (java.io.IOException)11 InvalidJobException (io.mantisrx.runtime.command.InvalidJobException)10 WorkerId (io.mantisrx.server.core.domain.WorkerId)10 GetJobDetailsResponse (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse)9 SchedulingInfo (io.mantisrx.runtime.descriptor.SchedulingInfo)8 MachineDefinition (io.mantisrx.runtime.MachineDefinition)7 StageSchedulingInfo (io.mantisrx.runtime.descriptor.StageSchedulingInfo)7 JobClusterAlreadyExistsException (io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException)4 SchedulerException (com.netflix.fenzo.triggers.exceptions.SchedulerException)2 TriggerNotFoundException (com.netflix.fenzo.triggers.exceptions.TriggerNotFoundException)2