Search in sources :

Example 1 with LaunchTaskRequest

use of io.mantisrx.server.master.scheduler.LaunchTaskRequest in project mantis by Netflix.

the class SchedulingService method launchTasks.

/**
 * Attempts to launch tasks given some number of leases from Mesos.
 *
 * When a task is launched successfully, the following will happen:
 *
 * 1. Emit a {@link WorkerLaunched} event to be handled by the corresponding actor.
 * 2. Makes a call to the underlying Mesos driver to launch the task.
 *
 * A task can fail to launch if:
 *
 * 1. It doesn't receive enough metadata for {@link WorkerPorts} to pass its preconditions.
 *      - No launch task request will be made for this assignment result.
 *      - Proactively unschedule the worker.
 * 2. It fails to emit a {@link WorkerLaunched} event.
 *      - The worker will get unscheduled for this launch task request.
 * 3. There are no launch tasks for this assignment result.
 *      - All of these leases are rejected.
 *      - Eventually, the underlying Mesos driver will decline offers since there are no launch task requests.
 *
 * @param requests collection of assignment results received by the scheduler.
 * @param leases list of resource offers from Mesos.
 */
private void launchTasks(Collection<TaskAssignmentResult> requests, List<VirtualMachineLease> leases) {
    List<LaunchTaskRequest> launchTaskRequests = new ArrayList<>();
    for (TaskAssignmentResult assignmentResult : requests) {
        ScheduleRequest request = (ScheduleRequest) assignmentResult.getRequest();
        WorkerPorts workerPorts = null;
        try {
            workerPorts = new WorkerPorts(assignmentResult.getAssignedPorts());
        } catch (IllegalArgumentException | IllegalStateException e) {
            logger.error("problem launching tasks for assignment result {}: {}", assignmentResult, e);
            numMissingWorkerPorts.increment();
        }
        if (workerPorts != null) {
            boolean success = jobMessageRouter.routeWorkerEvent(new WorkerLaunched(request.getWorkerId(), request.getStageNum(), leases.get(0).hostname(), leases.get(0).getVMID(), getAttribute(leases.get(0), slaveClusterAttributeName), workerPorts));
            if (success) {
                launchTaskRequests.add(new LaunchTaskRequest(request, workerPorts));
            } else {
                unscheduleWorker(request.getWorkerId(), Optional.ofNullable(leases.get(0).hostname()));
            }
        } else {
            unscheduleWorker(request.getWorkerId(), Optional.ofNullable(leases.get(0).hostname()));
        }
    }
    if (launchTaskRequests.isEmpty()) {
        for (VirtualMachineLease l : leases) virtualMachineService.rejectLease(l);
    }
    Map<ScheduleRequest, LaunchTaskException> launchErrors = virtualMachineService.launchTasks(launchTaskRequests, leases);
    for (TaskAssignmentResult result : requests) {
        final ScheduleRequest sre = (ScheduleRequest) result.getRequest();
        if (launchErrors.containsKey(sre)) {
            String errorMessage = getWorkerStringPrefix(sre.getStageNum(), sre.getWorkerId()) + " failed due to " + launchErrors.get(sre).getMessage();
            boolean success = jobMessageRouter.routeWorkerEvent(new WorkerLaunchFailed(sre.getWorkerId(), sre.getStageNum(), errorMessage));
            if (!success) {
                logger.warn("Failed to route WorkerLaunchFailed for {} (err {})", sre.getWorkerId(), errorMessage);
            }
        }
    }
}
Also used : ScheduleRequest(io.mantisrx.server.master.scheduler.ScheduleRequest) LaunchTaskRequest(io.mantisrx.server.master.scheduler.LaunchTaskRequest) ArrayList(java.util.ArrayList) VirtualMachineLease(com.netflix.fenzo.VirtualMachineLease) WorkerPorts(io.mantisrx.common.WorkerPorts) TaskAssignmentResult(com.netflix.fenzo.TaskAssignmentResult) WorkerLaunchFailed(io.mantisrx.server.master.scheduler.WorkerLaunchFailed) WorkerLaunched(io.mantisrx.server.master.scheduler.WorkerLaunched)

Example 2 with LaunchTaskRequest

use of io.mantisrx.server.master.scheduler.LaunchTaskRequest in project mantis by Netflix.

the class VirtualMachineMasterServiceMesosImpl method launchTasks.

// NOTE: All leases are for the same agent.
@Override
public Map<ScheduleRequest, LaunchTaskException> launchTasks(List<LaunchTaskRequest> requests, List<VirtualMachineLease> leases) {
    if (!super.getIsInited()) {
        logger.error("Not in leader mode, not launching tasks");
        return new HashMap<>();
    }
    Protos.SlaveID slaveID = leases.get(0).getOffer().getSlaveId();
    List<Protos.OfferID> offerIDs = new ArrayList<>();
    for (VirtualMachineLease vml : leases) offerIDs.add(vml.getOffer().getId());
    Map<ScheduleRequest, LaunchTaskException> errorResults = new HashMap<>();
    List<TaskInfo> taskInfos = new ArrayList<>();
    for (LaunchTaskRequest request : requests) {
        try {
            taskInfos.addAll(createTaskInfo(slaveID, request));
        } catch (LaunchTaskException e) {
            errorResults.put(request.getScheduleRequest(), e);
        }
    }
    if (!taskInfos.isEmpty())
        mesosDriver.get().launchTasks(offerIDs, taskInfos);
    else {
        // reject offers to prevent offer leak, but shouldn't happen
        for (VirtualMachineLease l : leases) {
            mesosDriver.get().declineOffer(l.getOffer().getId());
        }
    }
    return errorResults;
}
Also used : ScheduleRequest(io.mantisrx.server.master.scheduler.ScheduleRequest) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LaunchTaskRequest(io.mantisrx.server.master.scheduler.LaunchTaskRequest) LaunchTaskException(io.mantisrx.server.master.LaunchTaskException) VirtualMachineLease(com.netflix.fenzo.VirtualMachineLease) TaskInfo(org.apache.mesos.Protos.TaskInfo) Protos(org.apache.mesos.Protos)

Aggregations

VirtualMachineLease (com.netflix.fenzo.VirtualMachineLease)2 LaunchTaskRequest (io.mantisrx.server.master.scheduler.LaunchTaskRequest)2 ScheduleRequest (io.mantisrx.server.master.scheduler.ScheduleRequest)2 ArrayList (java.util.ArrayList)2 TaskAssignmentResult (com.netflix.fenzo.TaskAssignmentResult)1 WorkerPorts (io.mantisrx.common.WorkerPorts)1 LaunchTaskException (io.mantisrx.server.master.LaunchTaskException)1 WorkerLaunchFailed (io.mantisrx.server.master.scheduler.WorkerLaunchFailed)1 WorkerLaunched (io.mantisrx.server.master.scheduler.WorkerLaunched)1 HashMap (java.util.HashMap)1 Protos (org.apache.mesos.Protos)1 TaskInfo (org.apache.mesos.Protos.TaskInfo)1