use of io.mantisrx.server.master.domain.JobDefinition in project mantis by Netflix.
the class JobDefinitionResolver method getResolvedJobDefinition.
/**
* Encodes the logic of how to resolve the relevant fields of the submitted JobDefinition.
* Artifact | Version | SchedulingInfo | Resolution
* -------------------------------------------------
* Y | Y | Y | Use given scheduling info
* ------------------------------------------------------------
* Y | Y | N | INVALID (new artifact with no sched info)
* -------------------------------------------------------------
* Y | N | Y | Generate version and use given sched info
* --------------------------------------------------------------
* Y | N | N | INVALID (new artifact with no sched info)
* ---------------------------------------------------------------
* N | Y | Y | Lookup Cluster Config for given Version, get the SchedInfo from it and ensure given SchedInfo is compatible
* ----------------------------------------------------------------
* N | Y \ N | Lookup Cluster config for given version and use it
* -----------------------------------------------------------------
* N | N | Y | Get latest cluster config, get the SchedInfo from it and ensure given SchedInfo is compatible
* -----------------------------------------------------------------
* N | N | N | Get latest cluster config, get the SchedInfo from it
* -------------------------------------------------------------------
* @param user
* @param givenJobDefnOp
* @param jobClusterMetadata
* @return
* @throws Exception
*/
JobDefinition getResolvedJobDefinition(final String user, final JobDefinition givenJobDefnOp, final IJobClusterMetadata jobClusterMetadata) throws Exception {
Preconditions.checkNotNull(givenJobDefnOp, "JobDefinition cannot be null");
Preconditions.checkNotNull(jobClusterMetadata, "JobClusterMetadata cannot be null");
JobDefinition resolvedJobDefn = givenJobDefnOp;
logger.info("Given JobDefn {}", resolvedJobDefn);
// inherit params from cluster if not specified
List<Parameter> parameters = (resolvedJobDefn.getParameters() != null && !resolvedJobDefn.getParameters().isEmpty()) ? resolvedJobDefn.getParameters() : jobClusterMetadata.getJobClusterDefinition().getParameters();
// inherit labels from cluster if not specified
List<Label> labels = (resolvedJobDefn.getLabels() != null && !resolvedJobDefn.getLabels().isEmpty()) ? resolvedJobDefn.getLabels() : jobClusterMetadata.getJobClusterDefinition().getLabels();
String artifactName = resolvedJobDefn.getArtifactName();
SchedulingInfo schedulingInfo = resolvedJobDefn.getSchedulingInfo();
String version = resolvedJobDefn.getVersion();
JobClusterConfig jobClusterConfig = null;
if (!isNull(artifactName) && !isNull(version) && !schedulingInfoNotValid(schedulingInfo)) {
// update cluster ?
} else if (!isNull(artifactName) && !isNull(version) && schedulingInfoNotValid(schedulingInfo)) {
// scheduling Info is not given while new artifact is specified
// exception
String msg = String.format("Scheduling info is not specified during Job Submit for cluster %s while new artifact is specified %s. Job Submit fails", jobClusterMetadata.getJobClusterDefinition().getName(), artifactName);
logger.warn(msg);
throw new Exception(msg);
} else if (!isNull(artifactName) && isNull(version) && !schedulingInfoNotValid(schedulingInfo)) {
// artifact & schedulingInfo are given
// generate new version and update cluster
version = String.valueOf(System.currentTimeMillis());
// update cluster ?
} else if (!isNull(artifactName) && isNull(version) && schedulingInfoNotValid(schedulingInfo)) {
// scheduling info not given while new artifact is specified
// exception
String msg = String.format("Scheduling info is not specified during Job Submit for cluster %s while new artifact %s is specified. Job Submit fails", jobClusterMetadata.getJobClusterDefinition().getName(), artifactName);
logger.warn(msg);
throw new Exception(msg);
} else if (isNull(artifactName) && !isNull(version) && !schedulingInfoNotValid(schedulingInfo)) {
// version is given & scheduling info is given
// fetch JobCluster config for version and validate the given schedulingInfo is compatible
Optional<JobClusterConfig> clusterConfigForVersion = getJobClusterConfigForVersion(jobClusterMetadata, version);
if (!clusterConfigForVersion.isPresent()) {
String msg = String.format("No Job Cluster config could be found for version %s in JobCluster %s. Job Submit fails", version, jobClusterMetadata.getJobClusterDefinition().getName());
logger.warn(msg);
throw new Exception(msg);
}
jobClusterConfig = clusterConfigForVersion.get();
if (!validateSchedulingInfo(schedulingInfo, jobClusterConfig.getSchedulingInfo(), jobClusterMetadata)) {
String msg = String.format("Given SchedulingInfo %s is incompatible with that associated with the given version %s in JobCluster %s. Job Submit fails", schedulingInfo, version, jobClusterMetadata.getJobClusterDefinition().getName());
logger.warn(msg);
throw new Exception(msg);
}
artifactName = jobClusterConfig.getArtifactName();
} else if (isNull(artifactName) && !isNull(version) && schedulingInfoNotValid(schedulingInfo)) {
// Only version is given
// fetch JobCluster config for version
Optional<JobClusterConfig> clusterConfigForVersion = getJobClusterConfigForVersion(jobClusterMetadata, version);
if (!clusterConfigForVersion.isPresent()) {
String msg = String.format("No Job Cluster config could be found for version %s in JobCluster %s. Job Submit fails", version, jobClusterMetadata.getJobClusterDefinition().getName());
logger.warn(msg);
throw new Exception(msg);
}
jobClusterConfig = clusterConfigForVersion.get();
schedulingInfo = jobClusterConfig.getSchedulingInfo();
artifactName = jobClusterConfig.getArtifactName();
} else if (isNull(artifactName) && isNull(version) && !schedulingInfoNotValid(schedulingInfo)) {
// only scheduling info is given
// fetch latest Job Cluster config
jobClusterConfig = jobClusterMetadata.getJobClusterDefinition().getJobClusterConfig();
version = jobClusterConfig.getVersion();
artifactName = jobClusterConfig.getArtifactName();
// validate given scheduling info is compatible
if (!validateSchedulingInfo(schedulingInfo, jobClusterConfig.getSchedulingInfo(), jobClusterMetadata)) {
String msg = String.format("Given SchedulingInfo %s is incompatible with that associated with the given version %s in JobCluster %s which is %s. Job Submit fails", schedulingInfo, version, jobClusterMetadata.getJobClusterDefinition().getName(), jobClusterMetadata.getJobClusterDefinition().getJobClusterConfig().getSchedulingInfo());
logger.warn(msg);
throw new Exception(msg);
}
} else if (isNull(artifactName) && isNull(version) && schedulingInfoNotValid(schedulingInfo)) {
// Nothing is given. Use the latest on the cluster
// fetch latest job cluster config
jobClusterConfig = jobClusterMetadata.getJobClusterDefinition().getJobClusterConfig();
// set version to it
version = jobClusterConfig.getVersion();
// use scheduling info from that.
schedulingInfo = jobClusterConfig.getSchedulingInfo();
artifactName = jobClusterConfig.getArtifactName();
} else {
// exception should never get here.
throw new Exception(String.format("Invalid case for resolveJobDefinition artifactName %s version %s schedulingInfo %s", artifactName, version, schedulingInfo));
}
logger.info("Resolved version {}, schedulingInfo {}, artifactName {}", version, schedulingInfo, artifactName);
if (isNull(artifactName) || isNull(version) || schedulingInfoNotValid(schedulingInfo)) {
String msg = String.format(" SchedulingInfo %s or artifact %s or version %s could not be resolved in JobCluster %s. Job Submit fails", schedulingInfo, artifactName, version, jobClusterMetadata.getJobClusterDefinition().getName());
logger.warn(msg);
throw new Exception(msg);
}
return new JobDefinition.Builder().from(resolvedJobDefn).withParameters(parameters).withLabels(labels).withSchedulingInfo(schedulingInfo).withUser(user).withVersion(version).withArtifactName(artifactName).build();
}
use of io.mantisrx.server.master.domain.JobDefinition in project mantis by Netflix.
the class LabelManager method insertSystemLabels.
static JobDefinition insertSystemLabels(JobDefinition resolvedJobDefn, boolean autoResubmit) {
JobDefinition updatedJobDefn = resolvedJobDefn;
if (autoResubmit) {
updatedJobDefn = insertAutoResubmitLabel(resolvedJobDefn);
}
String artifactName = updatedJobDefn.getArtifactName();
String version = updatedJobDefn.getVersion();
List<Label> labels = updatedJobDefn.getLabels();
// remove old artifact & version label if present.
List<Label> updatedLabels = labels.stream().filter(label -> !(label.getName().equals(SystemLabels.MANTIS_ARTIFACT_LABEL.label))).filter(label -> !label.getName().equals(SystemLabels.MANTIS_VERSION_LABEL.label)).collect(Collectors.toList());
updatedLabels.add(new Label(SystemLabels.MANTIS_ARTIFACT_LABEL.label, artifactName));
updatedLabels.add(new Label(SystemLabels.MANTIS_VERSION_LABEL.label, version));
try {
updatedJobDefn = new JobDefinition.Builder().from(updatedJobDefn).withLabels(updatedLabels).build();
return updatedJobDefn;
} catch (InvalidJobException e) {
logger.error(e.getMessage());
return resolvedJobDefn;
}
}
use of io.mantisrx.server.master.domain.JobDefinition in project mantis by Netflix.
the class JobTestLifecycle method testListActiveWorkers.
@Test
public void testListActiveWorkers() {
final TestKit probe = new TestKit(system);
String clusterName = "testListActiveWorkers";
IJobClusterDefinition jobClusterDefn = JobTestHelper.generateJobClusterDefinition(clusterName);
JobDefinition jobDefn;
try {
SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerStageWithConstraints(2, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList()).build();
jobDefn = JobTestHelper.generateJobDefinition(clusterName, sInfo);
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
MantisJobMetadataImpl mantisJobMetaData = new MantisJobMetadataImpl.Builder().withJobId(new JobId(clusterName, 2)).withSubmittedAt(Instant.now()).withJobState(JobState.Accepted).withNextWorkerNumToUse(1).withJobDefinition(jobDefn).build();
final ActorRef jobActor = system.actorOf(JobActor.props(jobClusterDefn, mantisJobMetaData, jobStoreMock, schedulerMock, eventPublisher));
jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
JobProto.JobInitialized initMsg = probe.expectMsgClass(JobProto.JobInitialized.class);
assertEquals(SUCCESS, initMsg.responseCode);
String jobId = clusterName + "-2";
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("nj", jobId), probe.getRef());
// jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
GetJobDetailsResponse resp = probe.expectMsgClass(GetJobDetailsResponse.class);
System.out.println("resp " + resp + " msg " + resp.message);
assertEquals(SUCCESS, resp.responseCode);
assertEquals(JobState.Accepted, resp.getJobMetadata().get().getState());
int stageNo = 1;
// send launched event
WorkerId workerId = new WorkerId(jobId, 0, 1);
// send heartbeat
JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, jobId, stageNo, workerId);
// check job status again
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("nj", jobId), probe.getRef());
// jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
GetJobDetailsResponse resp2 = probe.expectMsgClass(GetJobDetailsResponse.class);
System.out.println("resp " + resp2 + " msg " + resp2.message);
assertEquals(SUCCESS, resp2.responseCode);
// Only 1 worker has started.
assertEquals(JobState.Accepted, resp2.getJobMetadata().get().getState());
// send launched event
WorkerId workerId2 = new WorkerId(jobId, 1, 2);
JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, jobId, stageNo, workerId2);
// check job status again
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("nj", jobId), probe.getRef());
// jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
GetJobDetailsResponse resp3 = probe.expectMsgClass(GetJobDetailsResponse.class);
System.out.println("resp " + resp3 + " msg " + resp3.message);
assertEquals(SUCCESS, resp3.responseCode);
// 2 worker have started so job should be started.
assertEquals(JobState.Launched, resp3.getJobMetadata().get().getState());
jobActor.tell(new JobClusterManagerProto.ListWorkersRequest(new JobId(clusterName, 1)), probe.getRef());
JobClusterManagerProto.ListWorkersResponse listWorkersResponse = probe.expectMsgClass(JobClusterManagerProto.ListWorkersResponse.class);
assertEquals(2, listWorkersResponse.getWorkerMetadata().size());
int cnt = 0;
for (IMantisWorkerMetadata workerMeta : listWorkersResponse.getWorkerMetadata()) {
if (workerMeta.getWorkerNumber() == 1 || workerMeta.getWorkerNumber() == 2) {
cnt++;
}
}
assertEquals(2, cnt);
verify(jobStoreMock, times(1)).storeNewJob(any());
verify(jobStoreMock, times(1)).storeNewWorkers(any(), any());
verify(jobStoreMock, times(6)).updateWorker(any());
verify(jobStoreMock, times(3)).updateJob(any());
// assertEquals(jobActor, probe.getLastSender());
} catch (InvalidJobException e) {
// TODO Auto-generated catch block
e.printStackTrace();
fail();
} catch (Exception e) {
e.printStackTrace();
fail();
}
}
use of io.mantisrx.server.master.domain.JobDefinition in project mantis by Netflix.
the class JobTestLifecycle method testJobSubmitWithoutInit.
@Test
public void testJobSubmitWithoutInit() {
final TestKit probe = new TestKit(system);
String clusterName = "testJobSubmitCluster";
IJobClusterDefinition jobClusterDefn = JobTestHelper.generateJobClusterDefinition(clusterName);
JobDefinition jobDefn;
try {
jobDefn = JobTestHelper.generateJobDefinition(clusterName);
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
MantisJobMetadataImpl mantisJobMetaData = new MantisJobMetadataImpl.Builder().withJobId(new JobId(clusterName, 1)).withSubmittedAt(Instant.now()).withJobState(JobState.Accepted).withNextWorkerNumToUse(1).withJobDefinition(jobDefn).build();
final ActorRef jobActor = system.actorOf(JobActor.props(jobClusterDefn, mantisJobMetaData, jobStoreMock, schedulerMock, eventPublisher));
String jobId = clusterName + "-1";
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("nj", jobId), probe.getRef());
GetJobDetailsResponse resp = probe.expectMsgClass(GetJobDetailsResponse.class);
System.out.println(resp.message);
assertEquals(CLIENT_ERROR, resp.responseCode);
} catch (Exception e) {
e.printStackTrace();
}
}
use of io.mantisrx.server.master.domain.JobDefinition in project mantis by Netflix.
the class JobTestLifecycle method testJobSubmit.
@Test
public void testJobSubmit() {
final TestKit probe = new TestKit(system);
String clusterName = "testJobSubmitCluster";
IJobClusterDefinition jobClusterDefn = JobTestHelper.generateJobClusterDefinition(clusterName);
JobDefinition jobDefn;
try {
jobDefn = JobTestHelper.generateJobDefinition(clusterName);
// IMantisStorageProvider storageProvider = new SimpleCachedFileStorageProvider();
// MantisJobStore jobStore = new MantisJobStore(storageProvider);
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
MantisJobMetadataImpl mantisJobMetaData = new MantisJobMetadataImpl.Builder().withJobId(new JobId(clusterName, 1)).withSubmittedAt(Instant.now()).withJobState(JobState.Accepted).withNextWorkerNumToUse(1).withJobDefinition(jobDefn).build();
final ActorRef jobActor = system.actorOf(JobActor.props(jobClusterDefn, mantisJobMetaData, jobStoreMock, schedulerMock, eventPublisher));
jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
JobProto.JobInitialized initMsg = probe.expectMsgClass(JobProto.JobInitialized.class);
assertEquals(SUCCESS, initMsg.responseCode);
String jobId = clusterName + "-1";
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("nj", jobId), probe.getRef());
// jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
GetJobDetailsResponse resp = probe.expectMsgClass(GetJobDetailsResponse.class);
System.out.println("resp " + resp + " msg " + resp.message);
assertEquals(SUCCESS, resp.responseCode);
assertEquals(JobState.Accepted, resp.getJobMetadata().get().getState());
assertTrue(resp.getJobMetadata().get().getStageMetadata(1).isPresent());
// send launched event
WorkerId workerId = new WorkerId(jobId, 0, 1);
int stageNum = 1;
JobTestHelper.sendWorkerLaunchedEvent(probe, jobActor, workerId, stageNum);
JobTestHelper.sendStartInitiatedEvent(probe, jobActor, stageNum, workerId);
// send heartbeat
JobTestHelper.sendHeartBeat(probe, jobActor, jobId, stageNum, workerId);
// check job status again
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("nj", jobId), probe.getRef());
// jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
GetJobDetailsResponse resp2 = probe.expectMsgClass(GetJobDetailsResponse.class);
System.out.println("resp " + resp2 + " msg " + resp2.message);
assertEquals(SUCCESS, resp2.responseCode);
assertEquals(JobState.Launched, resp2.getJobMetadata().get().getState());
verify(jobStoreMock, times(1)).storeNewJob(any());
verify(jobStoreMock, times(1)).storeNewWorkers(any(), any());
verify(jobStoreMock, times(3)).updateWorker(any());
verify(jobStoreMock, times(3)).updateJob(any());
// assertEquals(jobActor, probe.getLastSender());
} catch (InvalidJobException e) {
// TODO Auto-generated catch block
e.printStackTrace();
fail();
} catch (Exception e) {
e.printStackTrace();
fail();
}
}
Aggregations