use of io.mantisrx.server.master.persistence.MantisJobStore in project mantis by Netflix.
the class WorkerRegistryV2Test method testJobScaleUp.
@Test
public void testJobScaleUp() throws Exception, InvalidJobException, io.mantisrx.runtime.command.InvalidJobException {
WorkerRegistryV2 workerRegistryV2 = new WorkerRegistryV2();
LifecycleEventPublisher eventPublisher = new LifecycleEventPublisherImpl(new AuditEventSubscriberLoggingImpl(), new StatusEventSubscriberLoggingImpl(), new DummyWorkerEventSubscriberImpl(workerRegistryV2));
Map<StageScalingPolicy.ScalingReason, StageScalingPolicy.Strategy> smap = new HashMap<>();
smap.put(StageScalingPolicy.ScalingReason.CPU, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.CPU, 0.5, 0.75, null));
smap.put(StageScalingPolicy.ScalingReason.DataDrop, new StageScalingPolicy.Strategy(StageScalingPolicy.ScalingReason.DataDrop, 0.0, 2.0, null));
SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerScalableStageWithConstraints(1, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList(), new StageScalingPolicy(1, 0, 10, 1, 1, 0, smap)).build();
String clusterName = "testJobScaleUp";
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
ActorRef jobActor = JobTestHelper.submitSingleStageScalableJob(system, probe, clusterName, sInfo, schedulerMock, jobStoreMock, eventPublisher);
assertEquals(2, workerRegistryV2.getNumRunningWorkers());
// send scale up request
jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(clusterName + "-1", 1, 2, "", ""), probe.getRef());
JobClusterManagerProto.ScaleStageResponse scaleResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
System.out.println("ScaleupResp " + scaleResp.message);
assertEquals(SUCCESS, scaleResp.responseCode);
assertEquals(2, scaleResp.getActualNumWorkers());
JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, clusterName + "-1", 0, new WorkerId(clusterName + "-1", 1, 3));
jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("user", new JobId(clusterName, 1)), probe.getRef());
JobClusterManagerProto.GetJobDetailsResponse resp = probe.expectMsgClass(JobClusterManagerProto.GetJobDetailsResponse.class);
Map<Integer, ? extends IMantisStageMetadata> stageMetadata = resp.getJobMetadata().get().getStageMetadata();
assertEquals(2, stageMetadata.get(1).getAllWorkers().size());
int cnt = 0;
for (int i = 0; i < 50; i++) {
cnt++;
if (workerRegistryV2.getNumRunningWorkers() == 3) {
break;
}
}
assertTrue(cnt < 50);
}
use of io.mantisrx.server.master.persistence.MantisJobStore in project mantis by Netflix.
the class MantisMasterAPI method main.
public static void main(String[] args) throws Exception {
// boot up server using the route as defined below
int port = 8182;
TestHelpers.setupMasterConfig();
ActorSystem system = ActorSystem.create("MantisMasterAPI");
final ActorRef actor = system.actorOf(Props.create(DeadLetterActor.class));
system.eventStream().subscribe(actor, DeadLetter.class);
final Http http = Http.get(system);
final ActorMaterializer materializer = ActorMaterializer.create(system);
final AuditEventSubscriber auditEventSubscriber = new AuditEventSubscriberLoggingImpl();
ActorRef auditEventBrokerActor = system.actorOf(AuditEventBrokerActor.props(auditEventSubscriber), "AuditEventBroker");
final AuditEventSubscriber auditEventSubscriberAkka = new AuditEventSubscriberAkkaImpl(auditEventBrokerActor);
final LifecycleEventPublisher lifecycleEventPublisher = new LifecycleEventPublisherImpl(auditEventSubscriberAkka, new StatusEventSubscriberLoggingImpl(), new WorkerEventSubscriberLoggingImpl());
IMantisStorageProvider storageProvider = new MantisStorageProviderAdapter(new SimpleCachedFileStorageProvider(), lifecycleEventPublisher);
ActorRef jobClustersManager = system.actorOf(JobClustersManagerActor.props(new MantisJobStore(storageProvider), lifecycleEventPublisher), "JobClustersManager");
final FakeMantisScheduler fakeScheduler = new FakeMantisScheduler(jobClustersManager);
jobClustersManager.tell(new JobClusterManagerProto.JobClustersManagerInitialize(fakeScheduler, true), ActorRef.noSender());
// Schedulers.newThread().createWorker().schedulePeriodically(() -> jobClustersManager.tell(new NullPointerException(), ActorRef.noSender()),0, 100, TimeUnit.SECONDS);
setupDummyAgentClusterAutoScaler();
final JobClusterRouteHandler jobClusterRouteHandler = new JobClusterRouteHandlerAkkaImpl(jobClustersManager);
final JobRouteHandler jobRouteHandler = new JobRouteHandlerAkkaImpl(jobClustersManager);
MasterDescription masterDescription = new MasterDescription("localhost", "127.0.0.1", port, port + 2, port + 4, "api/postjobstatus", port + 6, System.currentTimeMillis());
final MasterDescriptionRoute masterDescriptionRoute = new MasterDescriptionRoute(masterDescription);
Duration idleTimeout = system.settings().config().getDuration("akka.http.server.idle-timeout");
logger.info("idle timeout {} sec ", idleTimeout.getSeconds());
ActorRef agentsErrorMonitorActor = system.actorOf(AgentsErrorMonitorActor.props(), "AgentsErrorMonitor");
ActorRef statusEventBrokerActor = system.actorOf(StatusEventBrokerActor.props(agentsErrorMonitorActor), "StatusEventBroker");
agentsErrorMonitorActor.tell(new AgentsErrorMonitorActor.InitializeAgentsErrorMonitor(fakeScheduler), ActorRef.noSender());
final JobStatusRouteHandler jobStatusRouteHandler = new JobStatusRouteHandlerAkkaImpl(system, statusEventBrokerActor);
final AgentClusterOperationsImpl agentClusterOperations = new AgentClusterOperationsImpl(storageProvider, new JobMessageRouterImpl(jobClustersManager), fakeScheduler, lifecycleEventPublisher, "cluster");
final JobDiscoveryRouteHandler jobDiscoveryRouteHandler = new JobDiscoveryRouteHandlerAkkaImpl(jobClustersManager, idleTimeout);
final JobRoute v0JobRoute = new JobRoute(jobRouteHandler, system);
final JobDiscoveryRoute v0JobDiscoveryRoute = new JobDiscoveryRoute(jobDiscoveryRouteHandler);
final JobClusterRoute v0JobClusterRoute = new JobClusterRoute(jobClusterRouteHandler, jobRouteHandler, system);
final JobStatusRoute v0JobStatusRoute = new JobStatusRoute(jobStatusRouteHandler);
final AgentClusterRoute v0AgentClusterRoute = new AgentClusterRoute(agentClusterOperations, system);
final JobClustersRoute v1JobClustersRoute = new JobClustersRoute(jobClusterRouteHandler, system);
final JobsRoute v1JobsRoute = new JobsRoute(jobClusterRouteHandler, jobRouteHandler, system);
final AdminMasterRoute v1AdminMasterRoute = new AdminMasterRoute(masterDescription);
final AgentClustersRoute v1AgentClustersRoute = new AgentClustersRoute(agentClusterOperations);
final JobDiscoveryStreamRoute v1JobDiscoveryStreamRoute = new JobDiscoveryStreamRoute(jobDiscoveryRouteHandler);
final LastSubmittedJobIdStreamRoute v1LastSubmittedJobIdStreamRoute = new LastSubmittedJobIdStreamRoute(jobDiscoveryRouteHandler);
final JobStatusStreamRoute v1JobStatusStreamRoute = new JobStatusStreamRoute(jobStatusRouteHandler);
LocalMasterMonitor localMasterMonitor = new LocalMasterMonitor(masterDescription);
LeadershipManagerLocalImpl leadershipMgr = new LeadershipManagerLocalImpl(masterDescription);
leadershipMgr.setLeaderReady();
LeaderRedirectionFilter leaderRedirectionFilter = new LeaderRedirectionFilter(localMasterMonitor, leadershipMgr);
final MantisMasterRoute app = new MantisMasterRoute(leaderRedirectionFilter, masterDescriptionRoute, v0JobClusterRoute, v0JobRoute, v0JobDiscoveryRoute, v0JobStatusRoute, v0AgentClusterRoute, v1JobClustersRoute, v1JobsRoute, v1AdminMasterRoute, v1AgentClustersRoute, v1JobDiscoveryStreamRoute, v1LastSubmittedJobIdStreamRoute, v1JobStatusStreamRoute);
final Flow<HttpRequest, HttpResponse, NotUsed> routeFlow = app.createRoute().flow(system, materializer);
final CompletionStage<ServerBinding> binding = http.bindAndHandle(routeFlow, ConnectHttp.toHost("localhost", port), materializer);
binding.exceptionally(failure -> {
System.err.println("Something very bad happened! " + failure.getMessage());
system.terminate();
return null;
});
// Schedulers.newThread().createWorker().schedule(() -> leadershipMgr.stopBeingLeader(), 10, TimeUnit.SECONDS);
System.out.println("Server online at http://localhost:" + port + "/\nPress RETURN to stop...");
// let it run until user presses return
System.in.read();
binding.thenCompose(// trigger unbinding from the port
ServerBinding::unbind).thenAccept(// and shutdown when done
unbound -> system.terminate());
}
use of io.mantisrx.server.master.persistence.MantisJobStore in project mantis by Netflix.
the class JobClusterTest method testJobKillTriggersSLAToLaunchNew.
@Test
public void testJobKillTriggersSLAToLaunchNew() {
TestKit probe = new TestKit(system);
String clusterName = "testJobKillTriggersSLAToLaunchNew";
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
SLA sla = new SLA(1, 1, null, null);
final JobClusterDefinitionImpl fakeJobCluster = createFakeJobClusterDefn(clusterName, Lists.newArrayList(), sla);
ActorRef jobClusterActor = system.actorOf(props(clusterName, jobStoreMock, schedulerMock, eventPublisher));
jobClusterActor.tell(new JobClusterProto.InitializeJobClusterRequest(fakeJobCluster, user, probe.getRef()), probe.getRef());
JobClusterProto.InitializeJobClusterResponse createResp = probe.expectMsgClass(JobClusterProto.InitializeJobClusterResponse.class);
assertEquals(SUCCESS, createResp.responseCode);
String jobId = clusterName + "-1";
WorkerId workerId1 = new WorkerId(clusterName, jobId, 0, 1);
doAnswer(invocation -> {
WorkerEvent terminate = new WorkerTerminate(workerId1, WorkerState.Completed, JobCompletedReason.Killed, System.currentTimeMillis());
jobClusterActor.tell(terminate, probe.getRef());
return null;
}).when(schedulerMock).unscheduleWorker(any(), any());
try {
final JobDefinition jobDefn = createJob(clusterName, 1, MantisJobDurationType.Transient);
JobId jId = new JobId(clusterName, 1);
JobTestHelper.submitJobAndVerifySuccess(probe, clusterName, jobClusterActor, jobDefn, jobId);
JobTestHelper.getJobDetailsAndVerify(probe, jobClusterActor, jobId, SUCCESS, JobState.Accepted);
JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobClusterActor, jobId, 1, new WorkerId(clusterName, jobId, 0, 1));
JobTestHelper.getJobDetailsAndVerify(probe, jobClusterActor, jobId, SUCCESS, JobState.Launched);
JobTestHelper.killJobAndVerify(probe, clusterName, jId, jobClusterActor);
Thread.sleep(500);
// a new job should have been submitted
JobTestHelper.getJobDetailsAndVerify(probe, jobClusterActor, clusterName + "-2", SUCCESS, JobState.Accepted);
// JobTestHelper.killJobAndVerify(probe, clusterName, new JobId(clusterName, 2), jobClusterActor);
// verify(jobStoreMock, times(1)).createJobCluster(any());
// verify(jobStoreMock, times(1)).updateJobCluster(any());
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
fail();
}
// Mockito.doThrow(IOException.class).when(jobStoreMock).storeNewJob(any());
}
use of io.mantisrx.server.master.persistence.MantisJobStore in project mantis by Netflix.
the class JobClusterTest method testZombieWorkerKilledOnMessage.
@Test
public void testZombieWorkerKilledOnMessage() {
String clusterName = "testZombieWorkerKilledOnMessage";
TestKit probe = new TestKit(system);
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
final JobClusterDefinitionImpl fakeJobCluster = createFakeJobClusterDefn(clusterName);
ActorRef jobClusterActor = system.actorOf(props(clusterName, jobStoreMock, schedulerMock, eventPublisher));
jobClusterActor.tell(new JobClusterProto.InitializeJobClusterRequest(fakeJobCluster, user, probe.getRef()), probe.getRef());
JobClusterProto.InitializeJobClusterResponse createResp = probe.expectMsgClass(JobClusterProto.InitializeJobClusterResponse.class);
assertEquals(SUCCESS, createResp.responseCode);
try {
String jobId = clusterName + "-1";
WorkerId workerId = new WorkerId(clusterName, jobId, 0, 1);
WorkerEvent heartBeat2 = new WorkerHeartbeat(new Status(jobId, 1, workerId.getWorkerIndex(), workerId.getWorkerNum(), TYPE.HEARTBEAT, "", MantisJobState.Started, System.currentTimeMillis()));
jobClusterActor.tell(heartBeat2, probe.getRef());
jobClusterActor.tell(new GetJobClusterRequest(clusterName), probe.getRef());
GetJobClusterResponse resp = probe.expectMsgClass(GetJobClusterResponse.class);
assertEquals(clusterName, resp.getJobCluster().get().getName());
verify(schedulerMock, times(1)).unscheduleAndTerminateWorker(workerId, empty());
} catch (Exception e) {
e.printStackTrace();
fail();
}
}
use of io.mantisrx.server.master.persistence.MantisJobStore in project mantis by Netflix.
the class JobClusterTest method testJobSubmitWithNoSchedInfoUsesJobClusterValues.
@Test
public void testJobSubmitWithNoSchedInfoUsesJobClusterValues() {
TestKit probe = new TestKit(system);
String clusterName = "testJobSubmitWithNoSchedInfoUsesJobClusterValues";
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
List<Label> clusterLabels = new ArrayList<>();
Label label = new Label("clabelName", "cLabelValue");
clusterLabels.add(label);
final JobClusterDefinitionImpl fakeJobCluster = createFakeJobClusterDefn(clusterName, clusterLabels);
ActorRef jobClusterActor = system.actorOf(props(clusterName, jobStoreMock, schedulerMock, eventPublisher));
jobClusterActor.tell(new JobClusterProto.InitializeJobClusterRequest(fakeJobCluster, user, probe.getRef()), probe.getRef());
JobClusterProto.InitializeJobClusterResponse createResp = probe.expectMsgClass(JobClusterProto.InitializeJobClusterResponse.class);
assertEquals(SUCCESS, createResp.responseCode);
try {
final JobDefinition jobDefn = new JobDefinition.Builder().withName(clusterName).withVersion("0.0.1").withSubscriptionTimeoutSecs(0).withUser("njoshi").build();
String jobId = clusterName + "-1";
JobTestHelper.submitJobAndVerifySuccess(probe, clusterName, jobClusterActor, jobDefn, jobId);
jobClusterActor.tell(new GetJobDetailsRequest("nj", JobId.fromId(jobId).get()), probe.getRef());
GetJobDetailsResponse detailsResp = probe.expectMsgClass(GetJobDetailsResponse.class);
assertEquals(SUCCESS, detailsResp.responseCode);
assertEquals(JobState.Accepted, detailsResp.getJobMetadata().get().getState());
//
assertEquals(clusterLabels.size() + LabelManager.numberOfMandatoryLabels(), detailsResp.getJobMetadata().get().getLabels().size());
// confirm that the clusters labels got inherited
assertEquals(1, detailsResp.getJobMetadata().get().getLabels().stream().filter(l -> l.getName().equals("clabelName")).count());
// assertEquals(label, detailsResp.getJobMetadata().get().getLabels().get(0));
// Now submit another one with labels, it should not inherit cluster labels
Label jobLabel = new Label("jobLabel", "jobValue");
List<Label> jobLabelList = new ArrayList<>();
jobLabelList.add(jobLabel);
final JobDefinition jobDefn2 = new JobDefinition.Builder().withName(clusterName).withVersion("0.0.1").withLabels(jobLabelList).withSubscriptionTimeoutSecs(0).withUser("njoshi").build();
String jobId2 = clusterName + "-2";
JobTestHelper.submitJobAndVerifySuccess(probe, clusterName, jobClusterActor, jobDefn2, jobId2);
jobClusterActor.tell(new GetJobDetailsRequest("nj", JobId.fromId(jobId2).get()), probe.getRef());
GetJobDetailsResponse detailsResp2 = probe.expectMsgClass(GetJobDetailsResponse.class);
assertEquals(SUCCESS, detailsResp2.responseCode);
assertEquals(JobState.Accepted, detailsResp2.getJobMetadata().get().getState());
assertEquals(clusterLabels.size() + 2, detailsResp2.getJobMetadata().get().getLabels().size());
// confirm that the clusters labels got inherited
// assertEquals(jobLabel, detailsResp2.getJobMetadata().get().getLabels().get(0));
assertEquals(1, detailsResp2.getJobMetadata().get().getLabels().stream().filter(l -> l.getName().equals(jobLabel.getName())).count());
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
fail();
}
// Mockito.doThrow(IOException.class).when(jobStoreMock).storeNewJob(any());
}
Aggregations