use of io.mantisrx.runtime.MachineDefinition in project mantis by Netflix.
the class JobTestLifecycle method testJobSubmitPerpetual.
@Test
public void testJobSubmitPerpetual() {
final TestKit probe = new TestKit(system);
String clusterName = "testJobSubmitPerpetual";
IJobClusterDefinition jobClusterDefn = JobTestHelper.generateJobClusterDefinition(clusterName);
JobDefinition jobDefn;
try {
MachineDefinition machineDefinition = new MachineDefinition(1.0, 1.0, 1.0, 1.0, 3);
SchedulingInfo schedInfo = new SchedulingInfo.Builder().numberOfStages(1).singleWorkerStageWithConstraints(machineDefinition, Lists.newArrayList(), Lists.newArrayList()).build();
jobDefn = new JobDefinition.Builder().withName(clusterName).withParameters(Lists.newArrayList()).withLabels(Lists.newArrayList()).withSchedulingInfo(schedInfo).withArtifactName("myart").withSubscriptionTimeoutSecs(30).withUser("njoshi").withNumberOfStages(schedInfo.getStages().size()).withJobSla(new JobSla(0, 0, null, MantisJobDurationType.Perpetual, null)).build();
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
MantisJobMetadataImpl mantisJobMetaData = new MantisJobMetadataImpl.Builder().withJobId(new JobId(clusterName, 1)).withSubmittedAt(Instant.now()).withJobState(JobState.Accepted).withNextWorkerNumToUse(1).withJobDefinition(jobDefn).build();
final ActorRef jobActor = system.actorOf(JobActor.props(jobClusterDefn, mantisJobMetaData, jobStoreMock, schedulerMock, eventPublisher));
jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
JobProto.JobInitialized initMsg = probe.expectMsgClass(JobProto.JobInitialized.class);
assertEquals(SUCCESS, initMsg.responseCode);
String jobId = clusterName + "-1";
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("nj", jobId), probe.getRef());
// jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
GetJobDetailsResponse resp = probe.expectMsgClass(GetJobDetailsResponse.class);
System.out.println("resp " + resp + " msg " + resp.message);
assertEquals(SUCCESS, resp.responseCode);
assertEquals(JobState.Accepted, resp.getJobMetadata().get().getState());
assertTrue(resp.getJobMetadata().get().getStageMetadata(1).isPresent());
// send launched event
WorkerId workerId = new WorkerId(jobId, 0, 1);
int stageNum = 1;
JobTestHelper.sendWorkerLaunchedEvent(probe, jobActor, workerId, stageNum);
JobTestHelper.sendStartInitiatedEvent(probe, jobActor, stageNum, workerId);
// send heartbeat
JobTestHelper.sendHeartBeat(probe, jobActor, jobId, stageNum, workerId);
// check job status again
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("nj", jobId), probe.getRef());
// jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
GetJobDetailsResponse resp2 = probe.expectMsgClass(GetJobDetailsResponse.class);
System.out.println("resp " + resp2 + " msg " + resp2.message);
assertEquals(SUCCESS, resp2.responseCode);
assertEquals(JobState.Launched, resp2.getJobMetadata().get().getState());
verify(jobStoreMock, times(1)).storeNewJob(any());
verify(jobStoreMock, times(1)).storeNewWorkers(any(), any());
verify(jobStoreMock, times(3)).updateWorker(any());
verify(jobStoreMock, times(3)).updateJob(any());
// verify(jobStoreMock, times(3))
verify(schedulerMock, times(1)).scheduleWorker(any());
JobMetadata jobMetadata = new JobMetadata(jobId, new URL("http://myart" + ""), 1, "njoshi", schedInfo, Lists.newArrayList(), 0, 0);
ScheduleRequest expectedScheduleRequest = new ScheduleRequest(workerId, 1, 4, jobMetadata, MantisJobDurationType.Perpetual, machineDefinition, Lists.newArrayList(), Lists.newArrayList(), 0, empty());
verify(schedulerMock).scheduleWorker(expectedScheduleRequest);
// assertEquals(jobActor, probe.getLastSender());
} catch (InvalidJobException e) {
// TODO Auto-generated catch block
e.printStackTrace();
fail();
} catch (Exception e) {
e.printStackTrace();
fail();
}
}
use of io.mantisrx.runtime.MachineDefinition in project mantis by Netflix.
the class JobTestLifecycle method testJobSubmitWithMultipleWorkers.
@Test
public void testJobSubmitWithMultipleWorkers() {
final TestKit probe = new TestKit(system);
String clusterName = "testJobSubmitWithMultipleWorkersCluster";
IJobClusterDefinition jobClusterDefn = JobTestHelper.generateJobClusterDefinition(clusterName);
JobDefinition jobDefn;
try {
SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerStageWithConstraints(2, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList()).build();
jobDefn = JobTestHelper.generateJobDefinition(clusterName, sInfo);
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
MantisJobMetadataImpl mantisJobMetaData = new MantisJobMetadataImpl.Builder().withJobId(new JobId(clusterName, 2)).withSubmittedAt(Instant.now()).withJobState(JobState.Accepted).withNextWorkerNumToUse(1).withJobDefinition(jobDefn).build();
final ActorRef jobActor = system.actorOf(JobActor.props(jobClusterDefn, mantisJobMetaData, jobStoreMock, schedulerMock, eventPublisher));
jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
JobProto.JobInitialized initMsg = probe.expectMsgClass(JobProto.JobInitialized.class);
assertEquals(SUCCESS, initMsg.responseCode);
String jobId = clusterName + "-2";
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("nj", jobId), probe.getRef());
// jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
GetJobDetailsResponse resp = probe.expectMsgClass(GetJobDetailsResponse.class);
System.out.println("resp " + resp + " msg " + resp.message);
assertEquals(SUCCESS, resp.responseCode);
assertEquals(JobState.Accepted, resp.getJobMetadata().get().getState());
int stageNo = 1;
// send launched event
WorkerId workerId = new WorkerId(jobId, 0, 1);
// send heartbeat
JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, jobId, stageNo, workerId);
// check job status again
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("nj", jobId), probe.getRef());
// jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
GetJobDetailsResponse resp2 = probe.expectMsgClass(GetJobDetailsResponse.class);
System.out.println("resp " + resp2 + " msg " + resp2.message);
assertEquals(SUCCESS, resp2.responseCode);
// Only 1 worker has started.
assertEquals(JobState.Accepted, resp2.getJobMetadata().get().getState());
// send launched event
WorkerId workerId2 = new WorkerId(jobId, 1, 2);
JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, jobId, stageNo, workerId2);
// check job status again
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("nj", jobId), probe.getRef());
// jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
GetJobDetailsResponse resp3 = probe.expectMsgClass(GetJobDetailsResponse.class);
System.out.println("resp " + resp3 + " msg " + resp3.message);
assertEquals(SUCCESS, resp3.responseCode);
// 2 worker have started so job should be started.
assertEquals(JobState.Launched, resp3.getJobMetadata().get().getState());
verify(jobStoreMock, times(1)).storeNewJob(any());
verify(jobStoreMock, times(1)).storeNewWorkers(any(), any());
verify(jobStoreMock, times(6)).updateWorker(any());
verify(jobStoreMock, times(3)).updateJob(any());
// assertEquals(jobActor, probe.getLastSender());
} catch (InvalidJobException e) {
// TODO Auto-generated catch block
e.printStackTrace();
fail();
} catch (Exception e) {
e.printStackTrace();
fail();
}
}
use of io.mantisrx.runtime.MachineDefinition in project mantis by Netflix.
the class JobTestLifecycle method testLostWorkerGetsReplaced.
//
@Test
public void testLostWorkerGetsReplaced() {
final TestKit probe = new TestKit(system);
String clusterName = "testLostWorkerGetsReplaced";
IJobClusterDefinition jobClusterDefn = JobTestHelper.generateJobClusterDefinition(clusterName);
ActorRef jobActor = null;
JobDefinition jobDefn;
try {
SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerStageWithConstraints(2, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList()).build();
jobDefn = JobTestHelper.generateJobDefinition(clusterName, sInfo);
MantisScheduler schedulerMock = mock(MantisScheduler.class);
// MantisJobStore jobStoreMock = mock(MantisJobStore.class);
MantisJobStore jobStoreSpied = Mockito.spy(jobStore);
MantisJobMetadataImpl mantisJobMetaData = new MantisJobMetadataImpl.Builder().withJobId(new JobId(clusterName, 2)).withSubmittedAt(Instant.now()).withJobState(JobState.Accepted).withNextWorkerNumToUse(1).withJobDefinition(jobDefn).build();
jobActor = system.actorOf(JobActor.props(jobClusterDefn, mantisJobMetaData, jobStoreSpied, schedulerMock, eventPublisher));
jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
JobProto.JobInitialized initMsg = probe.expectMsgClass(JobProto.JobInitialized.class);
assertEquals(SUCCESS, initMsg.responseCode);
String jobId = clusterName + "-2";
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("nj", jobId), probe.getRef());
// jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
GetJobDetailsResponse resp = probe.expectMsgClass(GetJobDetailsResponse.class);
System.out.println("resp " + resp + " msg " + resp.message);
assertEquals(SUCCESS, resp.responseCode);
assertEquals(JobState.Accepted, resp.getJobMetadata().get().getState());
int stageNo = 1;
// send launched event
WorkerId workerId = new WorkerId(jobId, 0, 1);
// send heartbeat
JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, jobId, stageNo, workerId);
// check job status again
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("nj", jobId), probe.getRef());
// jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
GetJobDetailsResponse resp2 = probe.expectMsgClass(GetJobDetailsResponse.class);
System.out.println("resp " + resp2 + " msg " + resp2.message);
assertEquals(SUCCESS, resp2.responseCode);
// Only 1 worker has started.
assertEquals(JobState.Accepted, resp2.getJobMetadata().get().getState());
// send launched event
WorkerId workerId2 = new WorkerId(jobId, 1, 2);
JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, jobId, stageNo, workerId2);
// check job status again
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("nj", jobId), probe.getRef());
GetJobDetailsResponse resp3 = probe.expectMsgClass(GetJobDetailsResponse.class);
System.out.println("resp " + resp3 + " msg " + resp3.message);
assertEquals(SUCCESS, resp3.responseCode);
// 2 worker have started so job should be started.
assertEquals(JobState.Launched, resp3.getJobMetadata().get().getState());
// worker 2 gets terminated abnormally
JobTestHelper.sendWorkerTerminatedEvent(probe, jobActor, jobId, workerId2);
// replaced worker comes up and sends events
WorkerId workerId2_replaced = new WorkerId(jobId, 1, 3);
JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, jobId, stageNo, workerId2_replaced);
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("nj", jobId), probe.getRef());
GetJobDetailsResponse resp4 = probe.expectMsgClass(GetJobDetailsResponse.class);
IMantisJobMetadata jobMeta = resp4.getJobMetadata().get();
Map<Integer, ? extends IMantisStageMetadata> stageMetadata = jobMeta.getStageMetadata();
IMantisStageMetadata stage = stageMetadata.get(1);
for (JobWorker worker : stage.getAllWorkers()) {
System.out.println("worker -> " + worker.getMetadata());
}
// 2 initial schedules and 1 replacement
verify(schedulerMock, timeout(1_000).times(3)).scheduleWorker(any());
// archive worker should get called once for the dead worker
// verify(jobStoreMock, timeout(1_000).times(1)).archiveWorker(any());
Mockito.verify(jobStoreSpied).archiveWorker(any());
// assertEquals(jobActor, probe.getLastSender());
} catch (InvalidJobException e) {
// TODO Auto-generated catch block
e.printStackTrace();
fail();
} catch (Exception e) {
e.printStackTrace();
fail();
} finally {
system.stop(jobActor);
}
}
use of io.mantisrx.runtime.MachineDefinition in project mantis by Netflix.
the class JobTestMigrationTests method testWorkerMigration.
@Test
public void testWorkerMigration() {
String clusterName = "testWorkerMigration";
TestKit probe = new TestKit(system);
SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).singleWorkerStageWithConstraints(new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList()).build();
IJobClusterDefinition jobClusterDefn = JobTestHelper.generateJobClusterDefinition(clusterName, sInfo, new WorkerMigrationConfig(MigrationStrategyEnum.ONE_WORKER, "{}"));
CountDownLatch scheduleCDL = new CountDownLatch(2);
CountDownLatch unscheduleCDL = new CountDownLatch(1);
JobDefinition jobDefn;
try {
jobDefn = JobTestHelper.generateJobDefinition(clusterName, sInfo);
// mock(MantisScheduler.class); //
MantisScheduler schedulerMock = new DummyScheduler(scheduleCDL, unscheduleCDL);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
MantisJobMetadataImpl mantisJobMetaData = new MantisJobMetadataImpl.Builder().withJobId(new JobId(clusterName, 2)).withSubmittedAt(Instant.now()).withJobState(JobState.Accepted).withNextWorkerNumToUse(1).withJobDefinition(jobDefn).build();
final ActorRef jobActor = system.actorOf(JobActor.props(jobClusterDefn, mantisJobMetaData, jobStoreMock, schedulerMock, eventPublisher));
jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
JobProto.JobInitialized initMsg = probe.expectMsgClass(JobProto.JobInitialized.class);
assertEquals(SUCCESS, initMsg.responseCode);
String jobId = clusterName + "-2";
int stageNo = 1;
WorkerId workerId = new WorkerId(jobId, 0, 1);
// send Launched, Initiated and heartbeat
JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, jobId, stageNo, workerId);
// check job status again
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("nj", jobId), probe.getRef());
JobClusterManagerProto.GetJobDetailsResponse resp3 = probe.expectMsgClass(JobClusterManagerProto.GetJobDetailsResponse.class);
assertEquals(SUCCESS, resp3.responseCode);
// worker has started so job should be started.
assertEquals(JobState.Launched, resp3.getJobMetadata().get().getState());
// Send migrate worker message
jobActor.tell(new WorkerOnDisabledVM(workerId), probe.getRef());
// Trigger check hb status and that should start the migration. And migrate first worker
Instant now = Instant.now();
jobActor.tell(new JobProto.CheckHeartBeat(), probe.getRef());
// send HB for the migrated worker
WorkerId migratedWorkerId1 = new WorkerId(jobId, 0, 2);
JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, jobId, stageNo, migratedWorkerId1);
// Trigger another check should be noop
// jobActor.tell(new JobProto.CheckHeartBeat(now.plusSeconds(120)), probe.getRef());
scheduleCDL.await(1, TimeUnit.SECONDS);
unscheduleCDL.await(1, TimeUnit.SECONDS);
// // 1 original submissions and 1 resubmit because of migration
// when(schedulerMock.scheduleWorker(any())).
// verify(schedulerMock, times(2)).scheduleWorker(any());
// // // 1 kill due to resubmits
// verify(schedulerMock, times(1)).unscheduleWorker(any(), any());
//
// assertEquals(jobActor, probe.getLastSender());
} catch (InvalidJobException e) {
// TODO Auto-generated catch block
e.printStackTrace();
fail();
} catch (Exception e) {
e.printStackTrace();
fail();
}
}
Aggregations