use of io.mantisrx.master.events.LifecycleEventPublisher in project mantis by Netflix.
the class AgentClustersRouteTest method setup.
@BeforeClass
public void setup() throws InterruptedException {
TestHelpers.setupMasterConfig();
final CountDownLatch latch = new CountDownLatch(1);
t = new Thread(() -> {
try {
// boot up server using the route as defined below
final Http http = Http.get(system);
final ActorMaterializer materializer = ActorMaterializer.create(system);
IMantisStorageProvider storageProvider = new SimpleCachedFileStorageProvider(true);
final LifecycleEventPublisher lifecycleEventPublisher = new LifecycleEventPublisherImpl(new AuditEventSubscriberLoggingImpl(), new StatusEventSubscriberLoggingImpl(), new WorkerEventSubscriberLoggingImpl());
ActorRef jobClustersManagerActor = system.actorOf(JobClustersManagerActor.props(new MantisJobStore(storageProvider), lifecycleEventPublisher), "jobClustersManager");
MantisScheduler fakeScheduler = new FakeMantisScheduler(jobClustersManagerActor);
jobClustersManagerActor.tell(new JobClusterManagerProto.JobClustersManagerInitialize(fakeScheduler, false), ActorRef.noSender());
setupDummyAgentClusterAutoScaler();
final AgentClustersRoute agentClusterV2Route = new AgentClustersRoute(new AgentClusterOperationsImpl(storageProvider, new JobMessageRouterImpl(jobClustersManagerActor), fakeScheduler, lifecycleEventPublisher, "cluster"));
final Flow<HttpRequest, HttpResponse, NotUsed> routeFlow = agentClusterV2Route.createRoute(Function.identity()).flow(system, materializer);
logger.info("test server starting on port {}", serverPort);
latch.countDown();
binding = http.bindAndHandle(routeFlow, ConnectHttp.toHost("localhost", serverPort), materializer);
} catch (Exception e) {
logger.info("caught exception", e);
latch.countDown();
e.printStackTrace();
}
});
t.setDaemon(true);
t.start();
latch.await();
}
use of io.mantisrx.master.events.LifecycleEventPublisher in project mantis by Netflix.
the class JobRouteTest method setup.
@BeforeClass
public static void setup() throws Exception {
JobTestHelper.deleteAllFiles();
JobTestHelper.createDirsIfRequired();
final CountDownLatch latch = new CountDownLatch(1);
TestHelpers.setupMasterConfig();
t = new Thread(() -> {
try {
// boot up server using the route as defined below
final Http http = Http.get(system);
final ActorMaterializer materializer = ActorMaterializer.create(system);
// SimpleCachedFileStorageProvider simpleCachedFileStorageProvider = new SimpleCachedFileStorageProvider();
// new File("/tmp/MantisSpool/namedJobs").mkdirs();
// IMantisStorageProvider storageProvider = new MantisStorageProviderAdapter(simpleCachedFileStorageProvider);
final LifecycleEventPublisher lifecycleEventPublisher = new LifecycleEventPublisherImpl(new AuditEventSubscriberLoggingImpl(), new StatusEventSubscriberLoggingImpl(), new WorkerEventSubscriberLoggingImpl());
ActorRef jobClustersManagerActor = system.actorOf(JobClustersManagerActor.props(new MantisJobStore(new io.mantisrx.server.master.persistence.SimpleCachedFileStorageProvider(true)), lifecycleEventPublisher), "jobClustersManager");
MantisScheduler fakeScheduler = new FakeMantisScheduler(jobClustersManagerActor);
jobClustersManagerActor.tell(new JobClusterManagerProto.JobClustersManagerInitialize(fakeScheduler, false), ActorRef.noSender());
final JobClusterRouteHandler jobClusterRouteHandler = new JobClusterRouteHandlerAkkaImpl(jobClustersManagerActor);
final JobRouteHandler jobRouteHandler = new JobRouteHandlerAkkaImpl(jobClustersManagerActor);
MasterDescription masterDescription = new MasterDescription("127.0.0.1", "127.0.0.1", serverPort, serverPort, serverPort, "api/postjobstatus", serverPort, System.currentTimeMillis());
Duration idleTimeout = system.settings().config().getDuration("akka.http.server.idle-timeout");
logger.info("idle timeout {} sec ", idleTimeout.getSeconds());
final JobDiscoveryRouteHandler jobDiscoveryRouteHandler = new JobDiscoveryRouteHandlerAkkaImpl(jobClustersManagerActor, idleTimeout);
final MasterDescriptionRoute masterDescriptionRoute = new MasterDescriptionRoute(masterDescription);
final JobRoute v0JobRoute = new JobRoute(jobRouteHandler, system);
final JobDiscoveryRoute v0JobDiscoveryRoute = new JobDiscoveryRoute(jobDiscoveryRouteHandler);
final JobClusterRoute v0JobClusterRoute = new JobClusterRoute(jobClusterRouteHandler, jobRouteHandler, system);
final JobClustersRoute v1JobClusterRoute = new JobClustersRoute(jobClusterRouteHandler, system);
final JobsRoute v1JobsRoute = new JobsRoute(jobClusterRouteHandler, jobRouteHandler, system);
final AdminMasterRoute v1AdminMasterRoute = new AdminMasterRoute(masterDescription);
final JobStatusRouteHandler jobStatusRouteHandler = mock(JobStatusRouteHandler.class);
when(jobStatusRouteHandler.jobStatus(anyString())).thenReturn(Flow.create());
final JobStatusRoute v0JobStatusRoute = new JobStatusRoute(jobStatusRouteHandler);
final AgentClusterOperations mockAgentClusterOps = mock(AgentClusterOperations.class);
final AgentClusterRoute v0AgentClusterRoute = new AgentClusterRoute(mockAgentClusterOps, system);
final AgentClustersRoute v1AgentClusterRoute = new AgentClustersRoute(mockAgentClusterOps);
final JobDiscoveryStreamRoute v1JobDiscoveryStreamRoute = new JobDiscoveryStreamRoute(jobDiscoveryRouteHandler);
final LastSubmittedJobIdStreamRoute v1LastSubmittedJobIdStreamRoute = new LastSubmittedJobIdStreamRoute(jobDiscoveryRouteHandler);
final JobStatusStreamRoute v1JobStatusStreamRoute = new JobStatusStreamRoute(jobStatusRouteHandler);
LocalMasterMonitor localMasterMonitor = new LocalMasterMonitor(masterDescription);
LeadershipManagerLocalImpl leadershipMgr = new LeadershipManagerLocalImpl(masterDescription);
leadershipMgr.setLeaderReady();
LeaderRedirectionFilter leaderRedirectionFilter = new LeaderRedirectionFilter(localMasterMonitor, leadershipMgr);
final MantisMasterRoute app = new MantisMasterRoute(leaderRedirectionFilter, masterDescriptionRoute, v0JobClusterRoute, v0JobRoute, v0JobDiscoveryRoute, v0JobStatusRoute, v0AgentClusterRoute, v1JobClusterRoute, v1JobsRoute, v1AdminMasterRoute, v1AgentClusterRoute, v1JobDiscoveryStreamRoute, v1LastSubmittedJobIdStreamRoute, v1JobStatusStreamRoute);
final Flow<HttpRequest, HttpResponse, NotUsed> routeFlow = app.createRoute().flow(system, materializer);
logger.info("starting test server on port {}", serverPort);
binding = http.bindAndHandle(routeFlow, ConnectHttp.toHost("localhost", serverPort), materializer);
latch.countDown();
} catch (Exception e) {
logger.info("caught exception", e);
latch.countDown();
e.printStackTrace();
}
});
t.setDaemon(true);
t.start();
latch.await();
Thread.sleep(100);
}
use of io.mantisrx.master.events.LifecycleEventPublisher in project mantis by Netflix.
the class JobClusterRouteTest method setup.
@BeforeClass
public static void setup() throws Exception {
TestHelpers.setupMasterConfig();
final CountDownLatch latch = new CountDownLatch(1);
t = new Thread(() -> {
try {
// boot up server using the route as defined below
final Http http = Http.get(system);
final ActorMaterializer materializer = ActorMaterializer.create(system);
final LifecycleEventPublisher lifecycleEventPublisher = new LifecycleEventPublisherImpl(new AuditEventSubscriberLoggingImpl(), new StatusEventSubscriberLoggingImpl(), new WorkerEventSubscriberLoggingImpl());
ActorRef jobClustersManagerActor = system.actorOf(JobClustersManagerActor.props(new MantisJobStore(new SimpleCachedFileStorageProvider(true)), lifecycleEventPublisher), "jobClustersManager");
MantisScheduler fakeScheduler = new FakeMantisScheduler(jobClustersManagerActor);
jobClustersManagerActor.tell(new JobClusterManagerProto.JobClustersManagerInitialize(fakeScheduler, false), ActorRef.noSender());
final JobClusterRouteHandler jobClusterRouteHandler = new JobClusterRouteHandlerAkkaImpl(jobClustersManagerActor);
final JobRouteHandler jobRouteHandler = new JobRouteHandlerAkkaImpl(jobClustersManagerActor);
final JobClusterRoute app = new JobClusterRoute(jobClusterRouteHandler, jobRouteHandler, system);
final Flow<HttpRequest, HttpResponse, NotUsed> routeFlow = app.createRoute(Function.identity()).flow(system, materializer);
logger.info("starting test server on port {}", serverPort);
binding = http.bindAndHandle(routeFlow, ConnectHttp.toHost("localhost", serverPort), materializer);
latch.countDown();
} catch (Exception e) {
logger.info("caught exception", e);
latch.countDown();
e.printStackTrace();
}
});
t.setDaemon(true);
t.start();
latch.await();
}
use of io.mantisrx.master.events.LifecycleEventPublisher in project mantis by Netflix.
the class MantisMasterAPI method main.
public static void main(String[] args) throws Exception {
// boot up server using the route as defined below
int port = 8182;
TestHelpers.setupMasterConfig();
ActorSystem system = ActorSystem.create("MantisMasterAPI");
final ActorRef actor = system.actorOf(Props.create(DeadLetterActor.class));
system.eventStream().subscribe(actor, DeadLetter.class);
final Http http = Http.get(system);
final ActorMaterializer materializer = ActorMaterializer.create(system);
final AuditEventSubscriber auditEventSubscriber = new AuditEventSubscriberLoggingImpl();
ActorRef auditEventBrokerActor = system.actorOf(AuditEventBrokerActor.props(auditEventSubscriber), "AuditEventBroker");
final AuditEventSubscriber auditEventSubscriberAkka = new AuditEventSubscriberAkkaImpl(auditEventBrokerActor);
final LifecycleEventPublisher lifecycleEventPublisher = new LifecycleEventPublisherImpl(auditEventSubscriberAkka, new StatusEventSubscriberLoggingImpl(), new WorkerEventSubscriberLoggingImpl());
IMantisStorageProvider storageProvider = new MantisStorageProviderAdapter(new SimpleCachedFileStorageProvider(), lifecycleEventPublisher);
ActorRef jobClustersManager = system.actorOf(JobClustersManagerActor.props(new MantisJobStore(storageProvider), lifecycleEventPublisher), "JobClustersManager");
final FakeMantisScheduler fakeScheduler = new FakeMantisScheduler(jobClustersManager);
jobClustersManager.tell(new JobClusterManagerProto.JobClustersManagerInitialize(fakeScheduler, true), ActorRef.noSender());
// Schedulers.newThread().createWorker().schedulePeriodically(() -> jobClustersManager.tell(new NullPointerException(), ActorRef.noSender()),0, 100, TimeUnit.SECONDS);
setupDummyAgentClusterAutoScaler();
final JobClusterRouteHandler jobClusterRouteHandler = new JobClusterRouteHandlerAkkaImpl(jobClustersManager);
final JobRouteHandler jobRouteHandler = new JobRouteHandlerAkkaImpl(jobClustersManager);
MasterDescription masterDescription = new MasterDescription("localhost", "127.0.0.1", port, port + 2, port + 4, "api/postjobstatus", port + 6, System.currentTimeMillis());
final MasterDescriptionRoute masterDescriptionRoute = new MasterDescriptionRoute(masterDescription);
Duration idleTimeout = system.settings().config().getDuration("akka.http.server.idle-timeout");
logger.info("idle timeout {} sec ", idleTimeout.getSeconds());
ActorRef agentsErrorMonitorActor = system.actorOf(AgentsErrorMonitorActor.props(), "AgentsErrorMonitor");
ActorRef statusEventBrokerActor = system.actorOf(StatusEventBrokerActor.props(agentsErrorMonitorActor), "StatusEventBroker");
agentsErrorMonitorActor.tell(new AgentsErrorMonitorActor.InitializeAgentsErrorMonitor(fakeScheduler), ActorRef.noSender());
final JobStatusRouteHandler jobStatusRouteHandler = new JobStatusRouteHandlerAkkaImpl(system, statusEventBrokerActor);
final AgentClusterOperationsImpl agentClusterOperations = new AgentClusterOperationsImpl(storageProvider, new JobMessageRouterImpl(jobClustersManager), fakeScheduler, lifecycleEventPublisher, "cluster");
final JobDiscoveryRouteHandler jobDiscoveryRouteHandler = new JobDiscoveryRouteHandlerAkkaImpl(jobClustersManager, idleTimeout);
final JobRoute v0JobRoute = new JobRoute(jobRouteHandler, system);
final JobDiscoveryRoute v0JobDiscoveryRoute = new JobDiscoveryRoute(jobDiscoveryRouteHandler);
final JobClusterRoute v0JobClusterRoute = new JobClusterRoute(jobClusterRouteHandler, jobRouteHandler, system);
final JobStatusRoute v0JobStatusRoute = new JobStatusRoute(jobStatusRouteHandler);
final AgentClusterRoute v0AgentClusterRoute = new AgentClusterRoute(agentClusterOperations, system);
final JobClustersRoute v1JobClustersRoute = new JobClustersRoute(jobClusterRouteHandler, system);
final JobsRoute v1JobsRoute = new JobsRoute(jobClusterRouteHandler, jobRouteHandler, system);
final AdminMasterRoute v1AdminMasterRoute = new AdminMasterRoute(masterDescription);
final AgentClustersRoute v1AgentClustersRoute = new AgentClustersRoute(agentClusterOperations);
final JobDiscoveryStreamRoute v1JobDiscoveryStreamRoute = new JobDiscoveryStreamRoute(jobDiscoveryRouteHandler);
final LastSubmittedJobIdStreamRoute v1LastSubmittedJobIdStreamRoute = new LastSubmittedJobIdStreamRoute(jobDiscoveryRouteHandler);
final JobStatusStreamRoute v1JobStatusStreamRoute = new JobStatusStreamRoute(jobStatusRouteHandler);
LocalMasterMonitor localMasterMonitor = new LocalMasterMonitor(masterDescription);
LeadershipManagerLocalImpl leadershipMgr = new LeadershipManagerLocalImpl(masterDescription);
leadershipMgr.setLeaderReady();
LeaderRedirectionFilter leaderRedirectionFilter = new LeaderRedirectionFilter(localMasterMonitor, leadershipMgr);
final MantisMasterRoute app = new MantisMasterRoute(leaderRedirectionFilter, masterDescriptionRoute, v0JobClusterRoute, v0JobRoute, v0JobDiscoveryRoute, v0JobStatusRoute, v0AgentClusterRoute, v1JobClustersRoute, v1JobsRoute, v1AdminMasterRoute, v1AgentClustersRoute, v1JobDiscoveryStreamRoute, v1LastSubmittedJobIdStreamRoute, v1JobStatusStreamRoute);
final Flow<HttpRequest, HttpResponse, NotUsed> routeFlow = app.createRoute().flow(system, materializer);
final CompletionStage<ServerBinding> binding = http.bindAndHandle(routeFlow, ConnectHttp.toHost("localhost", port), materializer);
binding.exceptionally(failure -> {
System.err.println("Something very bad happened! " + failure.getMessage());
system.terminate();
return null;
});
// Schedulers.newThread().createWorker().schedule(() -> leadershipMgr.stopBeingLeader(), 10, TimeUnit.SECONDS);
System.out.println("Server online at http://localhost:" + port + "/\nPress RETURN to stop...");
// let it run until user presses return
System.in.read();
binding.thenCompose(// trigger unbinding from the port
ServerBinding::unbind).thenAccept(// and shutdown when done
unbound -> system.terminate());
}
use of io.mantisrx.master.events.LifecycleEventPublisher in project mantis by Netflix.
the class JobScaleUpDownTests method testSchedulingInfo.
// TODO fix for timing issues
// @Test
public void testSchedulingInfo() throws Exception {
CountDownLatch latch = new CountDownLatch(11);
List<JobSchedulingInfo> schedulingChangesList = new CopyOnWriteArrayList<>();
final TestKit probe = new TestKit(system);
Map<ScalingReason, Strategy> smap = new HashMap<>();
smap.put(ScalingReason.CPU, new Strategy(ScalingReason.CPU, 0.5, 0.75, null));
smap.put(ScalingReason.DataDrop, new Strategy(ScalingReason.DataDrop, 0.0, 2.0, null));
SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).multiWorkerScalableStageWithConstraints(1, new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList(), new StageScalingPolicy(1, 0, 10, 1, 1, 0, smap)).build();
String clusterName = "testSchedulingInfo";
MantisScheduler schedulerMock = mock(MantisScheduler.class);
MantisJobStore jobStoreMock = mock(MantisJobStore.class);
CountDownLatch worker1Started = new CountDownLatch(1);
ActorRef jobActor = JobTestHelper.submitSingleStageScalableJob(system, probe, clusterName, sInfo, schedulerMock, jobStoreMock, lifecycleEventPublisher);
JobId jobId = new JobId(clusterName, 1);
JobClusterManagerProto.GetJobSchedInfoRequest getJobSchedInfoRequest = new JobClusterManagerProto.GetJobSchedInfoRequest(jobId);
jobActor.tell(getJobSchedInfoRequest, probe.getRef());
JobClusterManagerProto.GetJobSchedInfoResponse resp = probe.expectMsgClass(JobClusterManagerProto.GetJobSchedInfoResponse.class);
assertEquals(SUCCESS, resp.responseCode);
assertTrue(resp.getJobSchedInfoSubject().isPresent());
ObjectMapper mapper = new ObjectMapper();
BehaviorSubject<JobSchedulingInfo> jobSchedulingInfoBehaviorSubject = resp.getJobSchedInfoSubject().get();
jobSchedulingInfoBehaviorSubject.doOnNext((js) -> {
System.out.println("Got --> " + js.toString());
}).map((e) -> {
try {
return mapper.writeValueAsString(e);
} catch (JsonProcessingException e1) {
e1.printStackTrace();
return "{\"error\":" + e1.getMessage() + "}";
}
}).map((js) -> {
try {
return mapper.readValue(js, JobSchedulingInfo.class);
} catch (IOException e) {
e.printStackTrace();
return null;
}
}).filter((j) -> j != null).doOnNext((js) -> {
// Map<Integer, WorkerAssignments> workerAssignments = js.getWorkerAssignments();
// WorkerAssignments workerAssignments1 = workerAssignments.get(1);
// assertEquals(1, workerAssignments1.getNumWorkers());
// Map<Integer, WorkerHost> hosts = workerAssignments1.getHosts();
// // make sure worker number 1 exists
// assertTrue(hosts.containsKey(1));
}).doOnCompleted(() -> {
System.out.println("SchedulingInfo completed");
System.out.println(schedulingChangesList.size() + " Sched changes received");
}).observeOn(Schedulers.io()).subscribe((js) -> {
latch.countDown();
schedulingChangesList.add(js);
});
// send scale up request
jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(jobId.getId(), 1, 2, "", ""), probe.getRef());
JobClusterManagerProto.ScaleStageResponse scaleResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
System.out.println("ScaleupResp " + scaleResp.message);
assertEquals(SUCCESS, scaleResp.responseCode);
assertEquals(2, scaleResp.getActualNumWorkers());
JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, jobId.getId(), 1, new WorkerId(jobId.getId(), 1, 3));
// worker gets lost
JobTestHelper.sendWorkerTerminatedEvent(probe, jobActor, jobId.getId(), new WorkerId(jobId.getId(), 1, 3));
// Send replacement worker messages
JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, jobId.getId(), 1, new WorkerId(jobId.getId(), 1, 4));
// scale down
jobActor.tell(new JobClusterManagerProto.ScaleStageRequest(jobId.getId(), 1, 1, "", ""), probe.getRef());
JobClusterManagerProto.ScaleStageResponse scaleDownResp = probe.expectMsgClass(JobClusterManagerProto.ScaleStageResponse.class);
System.out.println("ScaleDownResp " + scaleDownResp.message);
assertEquals(SUCCESS, scaleDownResp.responseCode);
assertEquals(1, scaleDownResp.getActualNumWorkers());
// kill job
jobActor.tell(new JobClusterProto.KillJobRequest(jobId, "killed", JobCompletedReason.Killed, "test", probe.getRef()), probe.getRef());
probe.expectMsgClass(JobClusterProto.KillJobResponse.class);
for (JobSchedulingInfo jobSchedulingInfo : schedulingChangesList) {
System.out.println(jobSchedulingInfo);
}
/*
SchedulingChange [jobId=testSchedulingInfo-1,
workerAssignments={
0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
1=WorkerAssignments [stage=1, numWorkers=1, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}]}]
SchedulingChange [jobId=testSchedulingInfo-1, workerAssignments={
0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
1=WorkerAssignments [stage=1, numWorkers=2, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}]}]
SchedulingChange [jobId=testSchedulingInfo-1, workerAssignments={
0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
1=WorkerAssignments [stage=1, numWorkers=2, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]],
3=WorkerHost [state=Launched, workerIndex=1, host=host1, port=[9020]]}]}]
SchedulingChange [jobId=testSchedulingInfo-1, workerAssignments={
0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
1=WorkerAssignments [stage=1, numWorkers=2, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]],
3=WorkerHost [state=StartInitiated, workerIndex=1, host=host1, port=[9020]]}]}]
SchedulingChange [jobId=testSchedulingInfo-1, workerAssignments={
0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
1=WorkerAssignments [stage=1, numWorkers=2, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]],
3=WorkerHost [state=Started, workerIndex=1, host=host1, port=[9020]]}]}]
SchedulingChange [jobId=testSchedulingInfo-1, workerAssignments={
0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
1=WorkerAssignments [stage=1, numWorkers=2, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}]}]
SchedulingChange [jobId=testSchedulingInfo-1, workerAssignments={
0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
1=WorkerAssignments [stage=1, numWorkers=2, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]],
4=WorkerHost [state=Launched, workerIndex=1, host=host1, port=[9020]]}]}]
SchedulingChange [jobId=testSchedulingInfo-1, workerAssignments={
0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
1=WorkerAssignments [stage=1, numWorkers=2, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]],
4=WorkerHost [state=StartInitiated, workerIndex=1, host=host1, port=[9020]]}]}]
SchedulingChange [jobId=testSchedulingInfo-1, workerAssignments={
0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
1=WorkerAssignments [stage=1, numWorkers=2, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]],
4=WorkerHost [state=Started, workerIndex=1, host=host1, port=[9020]]}]}]
SchedulingChange [jobId=testSchedulingInfo-1, workerAssignments={
0=WorkerAssignments [stage=0, numWorkers=1, hosts={1=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}],
1=WorkerAssignments [stage=1, numWorkers=1, hosts={2=WorkerHost [state=Started, workerIndex=0, host=host1, port=[9020]]}
]}]
*/
latch.await(1000, TimeUnit.SECONDS);
System.out.println("---->Verifying scheduling changes " + schedulingChangesList.size());
assertEquals(11, schedulingChangesList.size());
for (int i = 0; i < schedulingChangesList.size(); i++) {
JobSchedulingInfo js = schedulingChangesList.get(i);
// jobid is correct
assertEquals(jobId.getId(), js.getJobId());
Map<Integer, WorkerAssignments> workerAssignments = js.getWorkerAssignments();
// has info about stage 1
System.out.println("WorkerAssignments -> " + workerAssignments);
// assertTrue(workerAssignments.containsKey(1));
switch(i) {
case 0:
WorkerAssignments wa0 = workerAssignments.get(1);
assertEquals(1, wa0.getNumWorkers());
Map<Integer, WorkerHost> hosts0 = wa0.getHosts();
// make sure worker number 2 exists
validateHost(hosts0, 0, 2, MantisJobState.Started);
break;
// scale up by 1
case 1:
WorkerAssignments wa1 = workerAssignments.get(1);
assertEquals(2, wa1.getNumWorkers());
Map<Integer, WorkerHost> hosts1 = wa1.getHosts();
assertEquals(1, hosts1.size());
// first update has only numWorkers updated but the new worker is still in Accepted state, so no host entry for it
validateHost(hosts1, 0, 2, MantisJobState.Started);
assertFalse(hosts1.containsKey(3));
break;
case 2:
WorkerAssignments wa2 = workerAssignments.get(1);
assertEquals(2, wa2.getNumWorkers());
Map<Integer, WorkerHost> hosts2 = wa2.getHosts();
assertEquals(2, hosts2.size());
// next update should have both numWorkers and the new worker in Launched state
validateHost(hosts2, 0, 2, MantisJobState.Started);
validateHost(hosts2, 1, 3, MantisJobState.Launched);
break;
case 3:
WorkerAssignments wa3 = workerAssignments.get(1);
assertEquals(2, wa3.getNumWorkers());
Map<Integer, WorkerHost> hosts3 = wa3.getHosts();
assertEquals(2, hosts3.size());
// this update is for new worker in StartInit state
validateHost(hosts3, 0, 2, MantisJobState.Started);
validateHost(hosts3, 1, 3, MantisJobState.StartInitiated);
break;
case 4:
WorkerAssignments wa4 = workerAssignments.get(1);
assertEquals(2, wa4.getNumWorkers());
Map<Integer, WorkerHost> hosts4 = wa4.getHosts();
assertEquals(2, hosts4.size());
// this update is for new worker in Started state
validateHost(hosts4, 0, 2, MantisJobState.Started);
validateHost(hosts4, 1, 3, MantisJobState.Started);
break;
case 5:
// worker 3 is lost and should be resubmitted
WorkerAssignments wa5 = workerAssignments.get(1);
assertEquals(2, wa5.getNumWorkers());
Map<Integer, WorkerHost> hosts5 = wa5.getHosts();
assertEquals(1, hosts5.size());
validateHost(hosts5, 0, 2, MantisJobState.Started);
assertFalse(hosts5.containsKey(3));
break;
case 6:
// worker 3 is replaced by worker num 4
WorkerAssignments wa6 = workerAssignments.get(1);
assertEquals(2, wa6.getNumWorkers());
Map<Integer, WorkerHost> hosts6 = wa6.getHosts();
// this update should have both numWorkers and the new worker in Launched state
assertEquals(2, hosts6.size());
validateHost(hosts6, 0, 2, MantisJobState.Started);
validateHost(hosts6, 1, 4, MantisJobState.Launched);
break;
case 7:
WorkerAssignments wa7 = workerAssignments.get(1);
assertEquals(2, wa7.getNumWorkers());
Map<Integer, WorkerHost> hosts7 = wa7.getHosts();
// update for new worker in StartInit state
assertEquals(2, hosts7.size());
validateHost(hosts7, 0, 2, MantisJobState.Started);
validateHost(hosts7, 1, 4, MantisJobState.StartInitiated);
break;
case 8:
WorkerAssignments wa8 = workerAssignments.get(1);
assertEquals(2, wa8.getNumWorkers());
Map<Integer, WorkerHost> hosts8 = wa8.getHosts();
// update for new worker in Started state
assertEquals(2, hosts8.size());
validateHost(hosts8, 0, 2, MantisJobState.Started);
validateHost(hosts8, 1, 4, MantisJobState.Started);
break;
case 9:
// scale down, worker 4 should be gone now and numWorkers set to 1
WorkerAssignments wa9 = workerAssignments.get(1);
assertEquals(1, wa9.getNumWorkers());
Map<Integer, WorkerHost> hosts9 = wa9.getHosts();
assertTrue(hosts9.containsKey(2));
assertEquals(1, hosts9.size());
validateHost(hosts9, 0, 2, MantisJobState.Started);
break;
case 10:
// job has been killed
assertTrue(workerAssignments.isEmpty());
break;
default:
fail();
}
}
//
// verify(jobStoreMock, times(1)).storeNewJob(any());
// // initial worker
// verify(jobStoreMock, times(1)).storeNewWorkers(any(),any());
//
// //scale up worker
// verify(jobStoreMock, times(1)).storeNewWorker(any());
//
// // verify(jobStoreMock, times(17)).updateWorker(any());
//
// verify(jobStoreMock, times(3)).updateJob(any());
//
// // initial worker + job master and scale up worker + resubmit
// verify(schedulerMock, times(4)).scheduleWorker(any());
//
// verify(schedulerMock, times(4)).unscheduleAndTerminateWorker(any(), any());
}
Aggregations