Search in sources :

Example 6 with MantisStorageProviderAdapter

use of io.mantisrx.server.master.persistence.MantisStorageProviderAdapter in project mantis by Netflix.

the class JobTestLifecycle method setup.

@BeforeClass
public static void setup() {
    system = ActorSystem.create();
    TestHelpers.setupMasterConfig();
    storageProvider = new MantisStorageProviderAdapter(new io.mantisrx.server.master.store.SimpleCachedFileStorageProvider(), eventPublisher);
    jobStore = new MantisJobStore(storageProvider);
}
Also used : MantisStorageProviderAdapter(io.mantisrx.server.master.persistence.MantisStorageProviderAdapter) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) BeforeClass(org.junit.BeforeClass)

Example 7 with MantisStorageProviderAdapter

use of io.mantisrx.server.master.persistence.MantisStorageProviderAdapter in project mantis by Netflix.

the class JobClusterManagerTest method testBootstrapJobClusterAndJobsWithCorruptedWorkerPorts.

/**
 * Case for a master leader re-election when a new master re-hydrates corrupted job worker metadata.
 */
@Test
public void testBootstrapJobClusterAndJobsWithCorruptedWorkerPorts() throws IOException, io.mantisrx.server.master.persistence.exceptions.InvalidJobException {
    TestKit probe = new TestKit(system);
    JobTestHelper.deleteAllFiles();
    MantisJobStore jobStore = new MantisJobStore(new MantisStorageProviderAdapter(new io.mantisrx.server.master.store.SimpleCachedFileStorageProvider(), eventPublisher));
    MantisJobStore jobStoreSpied = Mockito.spy(jobStore);
    MantisScheduler schedulerMock = mock(MantisScheduler.class);
    ActorRef jobClusterManagerActor = system.actorOf(JobClustersManagerActor.props(jobStoreSpied, eventPublisher));
    jobClusterManagerActor.tell(new JobClusterManagerProto.JobClustersManagerInitialize(schedulerMock, false), probe.getRef());
    probe.expectMsgClass(Duration.of(10, ChronoUnit.MINUTES), JobClustersManagerInitializeResponse.class);
    String jobClusterName = "testBootStrapJobClustersAndJobs1";
    WorkerMigrationConfig migrationConfig = new WorkerMigrationConfig(MigrationStrategyEnum.PERCENTAGE, "{\"percentToMove\":60, \"intervalMs\":30000}");
    createJobClusterAndAssert(jobClusterManagerActor, jobClusterName, migrationConfig);
    submitJobAndAssert(jobClusterManagerActor, jobClusterName);
    String jobId = "testBootStrapJobClustersAndJobs1-1";
    WorkerId workerId = new WorkerId(jobId, 0, 1);
    WorkerEvent launchedEvent = new WorkerLaunched(workerId, 0, "host1", "vm1", empty(), new WorkerPorts(Lists.newArrayList(8000, 9000, 9010, 9020, 9030)));
    jobClusterManagerActor.tell(launchedEvent, probe.getRef());
    WorkerEvent startInitEvent = new WorkerStatus(new Status(workerId.getJobId(), 1, workerId.getWorkerIndex(), workerId.getWorkerNum(), TYPE.INFO, "test START_INIT", MantisJobState.StartInitiated));
    jobClusterManagerActor.tell(startInitEvent, probe.getRef());
    WorkerEvent heartBeat = new WorkerHeartbeat(new Status(jobId, 1, workerId.getWorkerIndex(), workerId.getWorkerNum(), TYPE.HEARTBEAT, "", MantisJobState.Started));
    jobClusterManagerActor.tell(heartBeat, probe.getRef());
    // get Job status
    jobClusterManagerActor.tell(new GetJobDetailsRequest("user", JobId.fromId(jobId).get()), probe.getRef());
    GetJobDetailsResponse resp2 = probe.expectMsgClass(GetJobDetailsResponse.class);
    // Ensure its launched
    assertEquals(SUCCESS, resp2.responseCode);
    JobWorker worker = new JobWorker.Builder().withWorkerIndex(0).withWorkerNumber(1).withJobId(jobId).withStageNum(1).withNumberOfPorts(5).withWorkerPorts(null).withState(WorkerState.Started).withLifecycleEventsPublisher(eventPublisher).build();
    jobStoreSpied.updateWorker(worker.getMetadata());
    // Stop job cluster Manager Actor
    system.stop(jobClusterManagerActor);
    // create new instance
    jobClusterManagerActor = system.actorOf(JobClustersManagerActor.props(jobStoreSpied, eventPublisher));
    // initialize it
    jobClusterManagerActor.tell(new JobClusterManagerProto.JobClustersManagerInitialize(schedulerMock, true), probe.getRef());
    JobClustersManagerInitializeResponse initializeResponse = probe.expectMsgClass(JobClustersManagerInitializeResponse.class);
    assertEquals(SUCCESS, initializeResponse.responseCode);
    WorkerId newWorkerId = new WorkerId(jobId, 0, 11);
    launchedEvent = new WorkerLaunched(newWorkerId, 0, "host1", "vm1", empty(), new WorkerPorts(Lists.newArrayList(8000, 9000, 9010, 9020, 9030)));
    jobClusterManagerActor.tell(launchedEvent, probe.getRef());
    // Get Cluster Config
    jobClusterManagerActor.tell(new GetJobClusterRequest("testBootStrapJobClustersAndJobs1"), probe.getRef());
    GetJobClusterResponse clusterResponse = probe.expectMsgClass(GetJobClusterResponse.class);
    assertEquals(SUCCESS, clusterResponse.responseCode);
    assertTrue(clusterResponse.getJobCluster().isPresent());
    WorkerMigrationConfig mConfig = clusterResponse.getJobCluster().get().getMigrationConfig();
    assertEquals(migrationConfig.getStrategy(), mConfig.getStrategy());
    assertEquals(migrationConfig.getConfigString(), migrationConfig.getConfigString());
    // get Job status
    jobClusterManagerActor.tell(new GetJobDetailsRequest("user", JobId.fromId("testBootStrapJobClustersAndJobs1-1").get()), probe.getRef());
    resp2 = probe.expectMsgClass(GetJobDetailsResponse.class);
    // Ensure its launched
    assertEquals(SUCCESS, resp2.responseCode);
    assertEquals(JobState.Launched, resp2.getJobMetadata().get().getState());
    IMantisWorkerMetadata mantisWorkerMetadata = resp2.getJobMetadata().get().getWorkerByIndex(1, 0).get().getMetadata();
    assertNotNull(mantisWorkerMetadata.getWorkerPorts());
    assertEquals(11, mantisWorkerMetadata.getWorkerNumber());
    assertEquals(1, mantisWorkerMetadata.getTotalResubmitCount());
    jobClusterManagerActor.tell(new GetLastSubmittedJobIdStreamRequest("testBootStrapJobClustersAndJobs1"), probe.getRef());
    GetLastSubmittedJobIdStreamResponse lastSubmittedJobIdStreamResponse = probe.expectMsgClass(Duration.of(10, ChronoUnit.MINUTES), GetLastSubmittedJobIdStreamResponse.class);
    lastSubmittedJobIdStreamResponse.getjobIdBehaviorSubject().get().take(1).toBlocking().subscribe((jId) -> {
        assertEquals(new JobId("testBootStrapJobClustersAndJobs1", 1), jId);
    });
    // Two schedules: one for the initial success, one for a resubmit from corrupted worker ports.
    verify(schedulerMock, times(2)).scheduleWorker(any());
    // One unschedule from corrupted worker ID 1 (before the resubmit).
    verify(schedulerMock, times(1)).unscheduleAndTerminateWorker(eq(workerId), any());
    try {
        Mockito.verify(jobStoreSpied).loadAllArchivedJobsAsync();
        Mockito.verify(jobStoreSpied).loadAllActiveJobs();
        Mockito.verify(jobStoreSpied).loadAllCompletedJobs();
        Mockito.verify(jobStoreSpied).archiveWorker(any());
    } catch (IOException e) {
        e.printStackTrace();
        fail();
    }
}
Also used : GetJobClusterResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobClusterResponse) GetLastSubmittedJobIdStreamResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetLastSubmittedJobIdStreamResponse) ActorRef(akka.actor.ActorRef) GetJobDetailsRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsRequest) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) WorkerMigrationConfig(io.mantisrx.runtime.WorkerMigrationConfig) GetJobDetailsResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobDetailsResponse) WorkerHeartbeat(io.mantisrx.master.jobcluster.job.worker.WorkerHeartbeat) WorkerEvent(io.mantisrx.server.master.scheduler.WorkerEvent) WorkerStatus(io.mantisrx.master.jobcluster.job.worker.WorkerStatus) IMantisWorkerMetadata(io.mantisrx.master.jobcluster.job.worker.IMantisWorkerMetadata) GetJobClusterRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobClusterRequest) WorkerLaunched(io.mantisrx.server.master.scheduler.WorkerLaunched) JobId(io.mantisrx.server.master.domain.JobId) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) JobClustersManagerInitializeResponse(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.JobClustersManagerInitializeResponse) Status(io.mantisrx.server.core.Status) WorkerStatus(io.mantisrx.master.jobcluster.job.worker.WorkerStatus) TestKit(akka.testkit.javadsl.TestKit) IOException(java.io.IOException) WorkerId(io.mantisrx.server.core.domain.WorkerId) JobWorker(io.mantisrx.master.jobcluster.job.worker.JobWorker) MantisStorageProviderAdapter(io.mantisrx.server.master.persistence.MantisStorageProviderAdapter) GetLastSubmittedJobIdStreamRequest(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetLastSubmittedJobIdStreamRequest) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) WorkerPorts(io.mantisrx.common.WorkerPorts) Test(org.junit.Test)

Aggregations

MantisJobStore (io.mantisrx.server.master.persistence.MantisJobStore)7 MantisStorageProviderAdapter (io.mantisrx.server.master.persistence.MantisStorageProviderAdapter)7 ActorRef (akka.actor.ActorRef)5 JobClusterManagerProto (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto)5 TestKit (akka.testkit.javadsl.TestKit)4 JobClustersManagerInitializeResponse (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.JobClustersManagerInitializeResponse)4 MantisScheduler (io.mantisrx.server.master.scheduler.MantisScheduler)4 IOException (java.io.IOException)4 Test (org.junit.Test)4 GetJobClusterRequest (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobClusterRequest)3 GetJobClusterResponse (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto.GetJobClusterResponse)3 WorkerMigrationConfig (io.mantisrx.runtime.WorkerMigrationConfig)3 WorkerPorts (io.mantisrx.common.WorkerPorts)2 NotUsed (akka.NotUsed)1 ActorSystem (akka.actor.ActorSystem)1 ConnectHttp (akka.http.javadsl.ConnectHttp)1 Http (akka.http.javadsl.Http)1 ServerBinding (akka.http.javadsl.ServerBinding)1 HttpRequest (akka.http.javadsl.model.HttpRequest)1 HttpResponse (akka.http.javadsl.model.HttpResponse)1