Search in sources :

Example 1 with JobClusterConfig

use of io.mantisrx.server.master.domain.JobClusterConfig in project mantis by Netflix.

the class JobClusterActor method getResolvedJobDefinition.

/**
 * Two cases
 * 1. JobDefinition provided by user: In this case check if labels / parameters or schedulingInfo was not provided
 * if that is the case inherit from the Cluster
 * 2. If JobDefinition is not provided, find the last submitted job and use its config (quick submit)
 * @param user submitter
 * @param givenJobDefnOp job defn provided by user in job submit
 * @return job definition to be used by the actual submit
 * @throws Exception If jobDefinition could not be resolved
 */
private JobDefinition getResolvedJobDefinition(final String user, final Optional<JobDefinition> givenJobDefnOp) throws Exception {
    JobDefinition resolvedJobDefn;
    if (givenJobDefnOp.isPresent()) {
        if (givenJobDefnOp.get().getSchedulingInfo() != null && givenJobDefnOp.get().requireInheritInstanceCheck()) {
            logger.warn("Job requires inheriting instance count but has no active non-terminal job.");
        }
        resolvedJobDefn = givenJobDefnOp.get();
    } else {
        // no job definition specified , this is quick submit which is supposed to inherit from last job submitted
        // for request inheriting from non-terminal jobs, it has been sent to job actor instead.
        Optional<JobDefinition> jobDefnOp = cloneJobDefinitionForQuickSubmitFromArchivedJobs(jobManager.getCompletedJobsList(), empty(), jobStore);
        if (jobDefnOp.isPresent()) {
            logger.info("Inherited scheduling Info and parameters from previous job");
            resolvedJobDefn = jobDefnOp.get();
        } else if (this.jobClusterMetadata != null && this.jobClusterMetadata.getJobClusterDefinition() != null && this.jobClusterMetadata.getJobClusterDefinition().getJobClusterConfig() != null) {
            logger.info("No previous job definition found. Fall back to cluster definition: {}", this.name);
            IJobClusterDefinition clusterDefinition = this.jobClusterMetadata.getJobClusterDefinition();
            JobClusterConfig clusterConfig = this.jobClusterMetadata.getJobClusterDefinition().getJobClusterConfig();
            resolvedJobDefn = new JobDefinition.Builder().withJobSla(new JobSla.Builder().build()).withArtifactName(clusterConfig.getArtifactName()).withVersion(clusterConfig.getVersion()).withLabels(clusterDefinition.getLabels()).withName(this.name).withParameters(clusterDefinition.getParameters()).withSchedulingInfo(clusterConfig.getSchedulingInfo()).withUser(user).build();
            logger.info("Built job definition from cluster definition: {}", resolvedJobDefn);
        } else {
            throw new Exception("Job Definition could not retrieved from a previous submission (There may " + "not be a previous submission)");
        }
    }
    logger.info("Resolved JobDefn {}", resolvedJobDefn);
    return this.jobDefinitionResolver.getResolvedJobDefinition(user, resolvedJobDefn, this.jobClusterMetadata);
}
Also used : IJobClusterDefinition(io.mantisrx.server.master.domain.IJobClusterDefinition) JobClusterConfig(io.mantisrx.server.master.domain.JobClusterConfig) JobDefinition(io.mantisrx.server.master.domain.JobDefinition) TriggerNotFoundException(com.netflix.fenzo.triggers.exceptions.TriggerNotFoundException) SchedulerException(com.netflix.fenzo.triggers.exceptions.SchedulerException) JobClusterAlreadyExistsException(io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException)

Example 2 with JobClusterConfig

use of io.mantisrx.server.master.domain.JobClusterConfig in project mantis by Netflix.

the class JobClusterActor method onJobClusterUpdateLabels.

@Override
public void onJobClusterUpdateLabels(UpdateJobClusterLabelsRequest labelRequest) {
    if (logger.isTraceEnabled()) {
        logger.trace("Enter onJobClusterUpdateLabels {}", labelRequest);
    }
    ActorRef sender = getSender();
    try {
        JobClusterConfig newConfig = new JobClusterConfig.Builder().from(jobClusterMetadata.getJobClusterDefinition().getJobClusterConfig()).build();
        JobClusterDefinitionImpl updatedDefn = new JobClusterDefinitionImpl.Builder().from(jobClusterMetadata.getJobClusterDefinition()).withJobClusterConfig(newConfig).withLabels(labelRequest.getLabels()).build();
        IJobClusterMetadata jobCluster = new JobClusterMetadataImpl.Builder().withIsDisabled(jobClusterMetadata.isDisabled()).withLastJobCount(jobClusterMetadata.getLastJobCount()).withJobClusterDefinition(updatedDefn).build();
        updateAndSaveJobCluster(jobCluster);
        sender.tell(new UpdateJobClusterLabelsResponse(labelRequest.requestId, SUCCESS, name + " labels updated"), getSelf());
        eventPublisher.publishAuditEvent(new LifecycleEventsProto.AuditEvent(LifecycleEventsProto.AuditEvent.AuditEventType.JOB_CLUSTER_UPDATE, jobClusterMetadata.getJobClusterDefinition().getName(), name + " update labels"));
    } catch (Exception e) {
        logger.error("job cluster labels not updated ", e);
        sender.tell(new UpdateJobClusterLabelsResponse(labelRequest.requestId, SERVER_ERROR, name + " labels updation failed " + e.getMessage()), getSelf());
    }
    if (logger.isTraceEnabled()) {
        logger.trace("Exit onJobClusterUpdateLabels {}", labelRequest);
    }
}
Also used : ActorRef(akka.actor.ActorRef) JobClusterConfig(io.mantisrx.server.master.domain.JobClusterConfig) JobClusterDefinitionImpl(io.mantisrx.server.master.domain.JobClusterDefinitionImpl) LifecycleEventsProto(io.mantisrx.master.events.LifecycleEventsProto) UpdateJobClusterLabelsResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterLabelsResponse) TriggerNotFoundException(com.netflix.fenzo.triggers.exceptions.TriggerNotFoundException) SchedulerException(com.netflix.fenzo.triggers.exceptions.SchedulerException) JobClusterAlreadyExistsException(io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException)

Example 3 with JobClusterConfig

use of io.mantisrx.server.master.domain.JobClusterConfig in project mantis by Netflix.

the class JobClusterManagerTest method testJobClusterUpdateAndDelete.

@Test
public void testJobClusterUpdateAndDelete() throws MalformedURLException {
    TestKit probe = new TestKit(system);
    String clusterName = "testJobClusterUpdateAndDeleteCluster";
    List<Label> labels = Lists.newLinkedList();
    Label l = new Label("labelname", "labelvalue");
    labels.add(l);
    final JobClusterDefinitionImpl fakeJobCluster = createFakeJobClusterDefn(clusterName, labels);
    jobClusterManagerActor.tell(new JobClusterManagerProto.CreateJobClusterRequest(fakeJobCluster, "user"), probe.getRef());
    JobClusterManagerProto.CreateJobClusterResponse createResp = probe.expectMsgClass(JobClusterManagerProto.CreateJobClusterResponse.class);
    assertEquals(SUCCESS_CREATED, createResp.responseCode);
    JobClusterConfig clusterConfig = new JobClusterConfig.Builder().withArtifactName("myart2").withSchedulingInfo(TWO_WORKER_SCHED_INFO).withVersion("0.0.2").build();
    final JobClusterDefinitionImpl updatedFakeJobCluster = new JobClusterDefinitionImpl.Builder().withJobClusterConfig(clusterConfig).withName(clusterName).withParameters(Lists.newArrayList()).withUser(user).withIsReadyForJobMaster(true).withOwner(DEFAULT_JOB_OWNER).withMigrationConfig(WorkerMigrationConfig.DEFAULT).withSla(NO_OP_SLA).build();
    jobClusterManagerActor.tell(new JobClusterManagerProto.UpdateJobClusterRequest(updatedFakeJobCluster, "user"), probe.getRef());
    JobClusterManagerProto.UpdateJobClusterResponse updateResp = probe.expectMsgClass(JobClusterManagerProto.UpdateJobClusterResponse.class);
    if (SUCCESS != updateResp.responseCode) {
        System.out.println("Update cluster response: " + updateResp);
    }
    assertEquals(SUCCESS, updateResp.responseCode);
    // assertEquals(jobClusterManagerActor, probe.getLastSender());
    jobClusterManagerActor.tell(new JobClusterManagerProto.DeleteJobClusterRequest(user, clusterName), probe.getRef());
    JobClusterManagerProto.DeleteJobClusterResponse deleteResp = probe.expectMsgClass(JobClusterManagerProto.DeleteJobClusterResponse.class);
    assertEquals(SUCCESS, deleteResp.responseCode);
// assertEquals(jobClusterManagerActor, probe.getLastSender());
}
Also used : JobClusterConfig(io.mantisrx.server.master.domain.JobClusterConfig) Label(io.mantisrx.common.Label) JobClusterDefinitionImpl(io.mantisrx.server.master.domain.JobClusterDefinitionImpl) TestKit(akka.testkit.javadsl.TestKit) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) Test(org.junit.Test)

Example 4 with JobClusterConfig

use of io.mantisrx.server.master.domain.JobClusterConfig in project mantis by Netflix.

the class JobDefinitionResolverTest method lookupJobClusterConfigTest.

@Test
public void lookupJobClusterConfigTest() {
    String clusterName = "lookupJobClusterConfigTest";
    JobClusterConfig clusterConfig1 = new JobClusterConfig.Builder().withArtifactName(DEFAULT_ARTIFACT_NAME).withSchedulingInfo(SINGLE_WORKER_SCHED_INFO).withVersion(DEFAULT_VERSION).build();
    JobClusterConfig clusterConfig2 = new JobClusterConfig.Builder().withArtifactName("artifact2").withSchedulingInfo(TWO_WORKER_SCHED_INFO).withVersion("0.0.2").build();
    List<JobClusterConfig> configList = new ArrayList<>();
    configList.add(clusterConfig1);
    configList.add(clusterConfig2);
    JobClusterDefinitionImpl jobClusterDefinition = new JobClusterDefinitionImpl.Builder().withJobClusterConfigs(configList).withName(clusterName).withParameters(Lists.newArrayList()).withLabels(Lists.newArrayList()).withUser("user").withIsReadyForJobMaster(true).withOwner(DEFAULT_JOB_OWNER).withMigrationConfig(WorkerMigrationConfig.DEFAULT).withSla(NO_OP_SLA).build();
    IJobClusterMetadata jobClusterMetadata = new JobClusterMetadataImpl.Builder().withJobClusterDefinition(jobClusterDefinition).withLastJobCount(1).withIsDisabled(false).build();
    JobDefinitionResolver resolver = new JobDefinitionResolver();
    Optional<JobClusterConfig> config = resolver.getJobClusterConfigForVersion(jobClusterMetadata, DEFAULT_VERSION);
    assertTrue(config.isPresent());
    assertEquals(DEFAULT_ARTIFACT_NAME, config.get().getArtifactName());
    assertEquals(DEFAULT_VERSION, config.get().getVersion());
    assertEquals(SINGLE_WORKER_SCHED_INFO, config.get().getSchedulingInfo());
    Optional<JobClusterConfig> config2 = resolver.getJobClusterConfigForVersion(jobClusterMetadata, "0.0.2");
    assertTrue(config2.isPresent());
    assertEquals("artifact2", config2.get().getArtifactName());
    assertEquals("0.0.2", config2.get().getVersion());
    assertEquals(TWO_WORKER_SCHED_INFO, config2.get().getSchedulingInfo());
    try {
        Optional<JobClusterConfig> config3 = resolver.getJobClusterConfigForVersion(jobClusterMetadata, "0.0.3");
        assertTrue(!config3.isPresent());
    } catch (Exception e) {
        e.printStackTrace();
        fail();
    }
}
Also used : JobClusterConfig(io.mantisrx.server.master.domain.JobClusterConfig) ArrayList(java.util.ArrayList) JobClusterDefinitionImpl(io.mantisrx.server.master.domain.JobClusterDefinitionImpl) InvalidJobException(io.mantisrx.runtime.command.InvalidJobException) Test(org.junit.Test)

Example 5 with JobClusterConfig

use of io.mantisrx.server.master.domain.JobClusterConfig in project mantis by Netflix.

the class JobClusterActor method onJobClusterUpdateArtifact.

@Override
public void onJobClusterUpdateArtifact(UpdateJobClusterArtifactRequest artifactReq) {
    if (logger.isTraceEnabled()) {
        logger.trace("Entering JobClusterActor:onJobClusterUpdateArtifact");
    }
    ActorRef sender = getSender();
    try {
        if (!isVersionUnique(artifactReq.getVersion(), jobClusterMetadata.getJobClusterDefinition().getJobClusterConfigs())) {
            String msg = String.format("job cluster %s not updated as the version %s is not unique", name, artifactReq.getVersion());
            logger.error(msg);
            sender.tell(new UpdateJobClusterArtifactResponse(artifactReq.requestId, CLIENT_ERROR, msg), getSelf());
            return;
        }
        JobClusterConfig newConfig = new JobClusterConfig.Builder().from(jobClusterMetadata.getJobClusterDefinition().getJobClusterConfig()).withArtifactName(artifactReq.getArtifactName()).withVersion(artifactReq.getVersion()).withUploadedAt(System.currentTimeMillis()).build();
        JobClusterDefinitionImpl updatedDefn = new JobClusterDefinitionImpl.Builder().from(jobClusterMetadata.getJobClusterDefinition()).withJobClusterConfig(newConfig).build();
        IJobClusterMetadata jobCluster = new JobClusterMetadataImpl.Builder().withIsDisabled(jobClusterMetadata.isDisabled()).withLastJobCount(jobClusterMetadata.getLastJobCount()).withJobClusterDefinition(updatedDefn).build();
        updateAndSaveJobCluster(jobCluster);
        sender.tell(new UpdateJobClusterArtifactResponse(artifactReq.requestId, SUCCESS, name + " artifact updated"), getSelf());
        eventPublisher.publishAuditEvent(new LifecycleEventsProto.AuditEvent(LifecycleEventsProto.AuditEvent.AuditEventType.JOB_CLUSTER_UPDATE, jobClusterMetadata.getJobClusterDefinition().getName(), name + " artifact update"));
        if (!artifactReq.isSkipSubmit()) {
            getSelf().tell(new SubmitJobRequest(name, artifactReq.getUser(), (empty())), getSelf());
        }
    } catch (Exception e) {
        logger.error("job cluster not updated ", e);
        sender.tell(new UpdateJobClusterArtifactResponse(artifactReq.requestId, SERVER_ERROR, name + " Job cluster artifact updation failed " + e.getMessage()), getSelf());
    }
    if (logger.isTraceEnabled()) {
        logger.trace("Exit JobClusterActor:onJobClusterUpdateArtifact");
    }
}
Also used : ActorRef(akka.actor.ActorRef) JobClusterConfig(io.mantisrx.server.master.domain.JobClusterConfig) JobClusterDefinitionImpl(io.mantisrx.server.master.domain.JobClusterDefinitionImpl) SubmitJobRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.SubmitJobRequest) UpdateJobClusterArtifactResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterArtifactResponse) TriggerNotFoundException(com.netflix.fenzo.triggers.exceptions.TriggerNotFoundException) SchedulerException(com.netflix.fenzo.triggers.exceptions.SchedulerException) JobClusterAlreadyExistsException(io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException) LifecycleEventsProto(io.mantisrx.master.events.LifecycleEventsProto)

Aggregations

JobClusterConfig (io.mantisrx.server.master.domain.JobClusterConfig)6 JobClusterDefinitionImpl (io.mantisrx.server.master.domain.JobClusterDefinitionImpl)4 SchedulerException (com.netflix.fenzo.triggers.exceptions.SchedulerException)3 TriggerNotFoundException (com.netflix.fenzo.triggers.exceptions.TriggerNotFoundException)3 JobClusterAlreadyExistsException (io.mantisrx.server.master.persistence.exceptions.JobClusterAlreadyExistsException)3 ActorRef (akka.actor.ActorRef)2 Label (io.mantisrx.common.Label)2 LifecycleEventsProto (io.mantisrx.master.events.LifecycleEventsProto)2 JobDefinition (io.mantisrx.server.master.domain.JobDefinition)2 Test (org.junit.Test)2 TestKit (akka.testkit.javadsl.TestKit)1 JobClusterManagerProto (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto)1 SubmitJobRequest (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.SubmitJobRequest)1 UpdateJobClusterArtifactResponse (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterArtifactResponse)1 UpdateJobClusterLabelsResponse (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.UpdateJobClusterLabelsResponse)1 InvalidJobException (io.mantisrx.runtime.command.InvalidJobException)1 SchedulingInfo (io.mantisrx.runtime.descriptor.SchedulingInfo)1 Parameter (io.mantisrx.runtime.parameter.Parameter)1 IJobClusterDefinition (io.mantisrx.server.master.domain.IJobClusterDefinition)1 ArrayList (java.util.ArrayList)1