Search in sources :

Example 1 with WorkerOnDisabledVM

use of io.mantisrx.server.master.scheduler.WorkerOnDisabledVM in project mantis by Netflix.

the class AgentClusterOperationsImpl method manageActiveVMs.

List<String> manageActiveVMs(final List<VirtualMachineCurrentState> currentStates) {
    List<String> inactiveVMs = new ArrayList<>();
    if (currentStates != null && !currentStates.isEmpty()) {
        final List<String> values = getActiveVMsAttributeValues();
        if (values == null || values.isEmpty())
            // treat no valid active VMs attribute value as all are active
            return Collections.EMPTY_LIST;
        for (VirtualMachineCurrentState currentState : currentStates) {
            final VirtualMachineLease lease = currentState.getCurrAvailableResources();
            // logger.info("Lease for VM: " + currentState.getCurrAvailableResources());
            if (lease != null) {
                final Collection<TaskRequest> runningTasks = currentState.getRunningTasks();
                if (runningTasks != null && !runningTasks.isEmpty()) {
                    final Map<String, Protos.Attribute> attributeMap = lease.getAttributeMap();
                    if (attributeMap != null && !attributeMap.isEmpty()) {
                        final Protos.Attribute attribute = attributeMap.get(attrName);
                        if (attribute != null && attribute.hasText()) {
                            if (!isIn(attribute.getText().getValue(), values)) {
                                inactiveVMs.add(lease.hostname());
                                for (TaskRequest t : runningTasks) {
                                    Optional<WorkerId> workerIdO = WorkerId.fromId(t.getId());
                                    workerIdO.ifPresent(workerId -> jobMessageRouter.routeWorkerEvent(new WorkerOnDisabledVM(workerId)));
                                }
                            }
                        } else
                            logger.warn("No attribute value for " + attrName + " found on VM " + lease.hostname() + " that has " + runningTasks.size() + " tasks on it");
                    } else
                        logger.warn("No attributes found on VM " + lease.hostname() + " that has " + runningTasks.size() + " tasks on it");
                }
            }
        }
    }
    return inactiveVMs;
}
Also used : ArrayList(java.util.ArrayList) VirtualMachineCurrentState(com.netflix.fenzo.VirtualMachineCurrentState) TaskRequest(com.netflix.fenzo.TaskRequest) VirtualMachineLease(com.netflix.fenzo.VirtualMachineLease) WorkerId(io.mantisrx.server.core.domain.WorkerId) Protos(org.apache.mesos.Protos) WorkerOnDisabledVM(io.mantisrx.server.master.scheduler.WorkerOnDisabledVM)

Example 2 with WorkerOnDisabledVM

use of io.mantisrx.server.master.scheduler.WorkerOnDisabledVM in project mantis by Netflix.

the class JobTestMigrationTests method testWorkerMigration.

@Test
public void testWorkerMigration() {
    String clusterName = "testWorkerMigration";
    TestKit probe = new TestKit(system);
    SchedulingInfo sInfo = new SchedulingInfo.Builder().numberOfStages(1).singleWorkerStageWithConstraints(new MachineDefinition(1.0, 1.0, 1.0, 3), Lists.newArrayList(), Lists.newArrayList()).build();
    IJobClusterDefinition jobClusterDefn = JobTestHelper.generateJobClusterDefinition(clusterName, sInfo, new WorkerMigrationConfig(MigrationStrategyEnum.ONE_WORKER, "{}"));
    CountDownLatch scheduleCDL = new CountDownLatch(2);
    CountDownLatch unscheduleCDL = new CountDownLatch(1);
    JobDefinition jobDefn;
    try {
        jobDefn = JobTestHelper.generateJobDefinition(clusterName, sInfo);
        // mock(MantisScheduler.class); //
        MantisScheduler schedulerMock = new DummyScheduler(scheduleCDL, unscheduleCDL);
        MantisJobStore jobStoreMock = mock(MantisJobStore.class);
        MantisJobMetadataImpl mantisJobMetaData = new MantisJobMetadataImpl.Builder().withJobId(new JobId(clusterName, 2)).withSubmittedAt(Instant.now()).withJobState(JobState.Accepted).withNextWorkerNumToUse(1).withJobDefinition(jobDefn).build();
        final ActorRef jobActor = system.actorOf(JobActor.props(jobClusterDefn, mantisJobMetaData, jobStoreMock, schedulerMock, eventPublisher));
        jobActor.tell(new JobProto.InitJob(probe.getRef()), probe.getRef());
        JobProto.JobInitialized initMsg = probe.expectMsgClass(JobProto.JobInitialized.class);
        assertEquals(SUCCESS, initMsg.responseCode);
        String jobId = clusterName + "-2";
        int stageNo = 1;
        WorkerId workerId = new WorkerId(jobId, 0, 1);
        // send Launched, Initiated and heartbeat
        JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, jobId, stageNo, workerId);
        // check job status again
        jobActor.tell(new JobClusterManagerProto.GetJobDetailsRequest("nj", jobId), probe.getRef());
        JobClusterManagerProto.GetJobDetailsResponse resp3 = probe.expectMsgClass(JobClusterManagerProto.GetJobDetailsResponse.class);
        assertEquals(SUCCESS, resp3.responseCode);
        // worker has started so job should be started.
        assertEquals(JobState.Launched, resp3.getJobMetadata().get().getState());
        // Send migrate worker message
        jobActor.tell(new WorkerOnDisabledVM(workerId), probe.getRef());
        // Trigger check hb status and that should start the migration. And migrate first worker
        Instant now = Instant.now();
        jobActor.tell(new JobProto.CheckHeartBeat(), probe.getRef());
        // send HB for the migrated worker
        WorkerId migratedWorkerId1 = new WorkerId(jobId, 0, 2);
        JobTestHelper.sendLaunchedInitiatedStartedEventsToWorker(probe, jobActor, jobId, stageNo, migratedWorkerId1);
        // Trigger another check should be noop
        // jobActor.tell(new JobProto.CheckHeartBeat(now.plusSeconds(120)), probe.getRef());
        scheduleCDL.await(1, TimeUnit.SECONDS);
        unscheduleCDL.await(1, TimeUnit.SECONDS);
    // // 1 original submissions and 1 resubmit because of migration
    // when(schedulerMock.scheduleWorker(any())).
    // verify(schedulerMock, times(2)).scheduleWorker(any());
    // //            // 1 kill due to resubmits
    // verify(schedulerMock, times(1)).unscheduleWorker(any(), any());
    // 
    // assertEquals(jobActor, probe.getLastSender());
    } catch (InvalidJobException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        fail();
    } catch (Exception e) {
        e.printStackTrace();
        fail();
    }
}
Also used : ActorRef(akka.actor.ActorRef) JobProto(io.mantisrx.master.jobcluster.proto.JobProto) WorkerMigrationConfig(io.mantisrx.runtime.WorkerMigrationConfig) MantisScheduler(io.mantisrx.server.master.scheduler.MantisScheduler) WorkerOnDisabledVM(io.mantisrx.server.master.scheduler.WorkerOnDisabledVM) JobDefinition(io.mantisrx.server.master.domain.JobDefinition) JobId(io.mantisrx.server.master.domain.JobId) JobClusterManagerProto(io.mantisrx.master.jobcluster.proto.JobClusterManagerProto) SchedulingInfo(io.mantisrx.runtime.descriptor.SchedulingInfo) MachineDefinition(io.mantisrx.runtime.MachineDefinition) Instant(java.time.Instant) TestKit(akka.testkit.javadsl.TestKit) CountDownLatch(java.util.concurrent.CountDownLatch) WorkerId(io.mantisrx.server.core.domain.WorkerId) InvalidJobException(io.mantisrx.runtime.command.InvalidJobException) MantisJobStore(io.mantisrx.server.master.persistence.MantisJobStore) IJobClusterDefinition(io.mantisrx.server.master.domain.IJobClusterDefinition) InvalidJobException(io.mantisrx.runtime.command.InvalidJobException) Test(org.junit.Test)

Aggregations

WorkerId (io.mantisrx.server.core.domain.WorkerId)2 WorkerOnDisabledVM (io.mantisrx.server.master.scheduler.WorkerOnDisabledVM)2 ActorRef (akka.actor.ActorRef)1 TestKit (akka.testkit.javadsl.TestKit)1 TaskRequest (com.netflix.fenzo.TaskRequest)1 VirtualMachineCurrentState (com.netflix.fenzo.VirtualMachineCurrentState)1 VirtualMachineLease (com.netflix.fenzo.VirtualMachineLease)1 JobClusterManagerProto (io.mantisrx.master.jobcluster.proto.JobClusterManagerProto)1 JobProto (io.mantisrx.master.jobcluster.proto.JobProto)1 MachineDefinition (io.mantisrx.runtime.MachineDefinition)1 WorkerMigrationConfig (io.mantisrx.runtime.WorkerMigrationConfig)1 InvalidJobException (io.mantisrx.runtime.command.InvalidJobException)1 SchedulingInfo (io.mantisrx.runtime.descriptor.SchedulingInfo)1 IJobClusterDefinition (io.mantisrx.server.master.domain.IJobClusterDefinition)1 JobDefinition (io.mantisrx.server.master.domain.JobDefinition)1 JobId (io.mantisrx.server.master.domain.JobId)1 MantisJobStore (io.mantisrx.server.master.persistence.MantisJobStore)1 MantisScheduler (io.mantisrx.server.master.scheduler.MantisScheduler)1 Instant (java.time.Instant)1 ArrayList (java.util.ArrayList)1