Search in sources :

Example 1 with WorkerLaunchFailed

use of io.mantisrx.server.master.scheduler.WorkerLaunchFailed in project mantis by Netflix.

the class SchedulingService method launchTasks.

/**
 * Attempts to launch tasks given some number of leases from Mesos.
 *
 * When a task is launched successfully, the following will happen:
 *
 * 1. Emit a {@link WorkerLaunched} event to be handled by the corresponding actor.
 * 2. Makes a call to the underlying Mesos driver to launch the task.
 *
 * A task can fail to launch if:
 *
 * 1. It doesn't receive enough metadata for {@link WorkerPorts} to pass its preconditions.
 *      - No launch task request will be made for this assignment result.
 *      - Proactively unschedule the worker.
 * 2. It fails to emit a {@link WorkerLaunched} event.
 *      - The worker will get unscheduled for this launch task request.
 * 3. There are no launch tasks for this assignment result.
 *      - All of these leases are rejected.
 *      - Eventually, the underlying Mesos driver will decline offers since there are no launch task requests.
 *
 * @param requests collection of assignment results received by the scheduler.
 * @param leases list of resource offers from Mesos.
 */
private void launchTasks(Collection<TaskAssignmentResult> requests, List<VirtualMachineLease> leases) {
    List<LaunchTaskRequest> launchTaskRequests = new ArrayList<>();
    for (TaskAssignmentResult assignmentResult : requests) {
        ScheduleRequest request = (ScheduleRequest) assignmentResult.getRequest();
        WorkerPorts workerPorts = null;
        try {
            workerPorts = new WorkerPorts(assignmentResult.getAssignedPorts());
        } catch (IllegalArgumentException | IllegalStateException e) {
            logger.error("problem launching tasks for assignment result {}: {}", assignmentResult, e);
            numMissingWorkerPorts.increment();
        }
        if (workerPorts != null) {
            boolean success = jobMessageRouter.routeWorkerEvent(new WorkerLaunched(request.getWorkerId(), request.getStageNum(), leases.get(0).hostname(), leases.get(0).getVMID(), getAttribute(leases.get(0), slaveClusterAttributeName), workerPorts));
            if (success) {
                launchTaskRequests.add(new LaunchTaskRequest(request, workerPorts));
            } else {
                unscheduleWorker(request.getWorkerId(), Optional.ofNullable(leases.get(0).hostname()));
            }
        } else {
            unscheduleWorker(request.getWorkerId(), Optional.ofNullable(leases.get(0).hostname()));
        }
    }
    if (launchTaskRequests.isEmpty()) {
        for (VirtualMachineLease l : leases) virtualMachineService.rejectLease(l);
    }
    Map<ScheduleRequest, LaunchTaskException> launchErrors = virtualMachineService.launchTasks(launchTaskRequests, leases);
    for (TaskAssignmentResult result : requests) {
        final ScheduleRequest sre = (ScheduleRequest) result.getRequest();
        if (launchErrors.containsKey(sre)) {
            String errorMessage = getWorkerStringPrefix(sre.getStageNum(), sre.getWorkerId()) + " failed due to " + launchErrors.get(sre).getMessage();
            boolean success = jobMessageRouter.routeWorkerEvent(new WorkerLaunchFailed(sre.getWorkerId(), sre.getStageNum(), errorMessage));
            if (!success) {
                logger.warn("Failed to route WorkerLaunchFailed for {} (err {})", sre.getWorkerId(), errorMessage);
            }
        }
    }
}
Also used : ScheduleRequest(io.mantisrx.server.master.scheduler.ScheduleRequest) LaunchTaskRequest(io.mantisrx.server.master.scheduler.LaunchTaskRequest) ArrayList(java.util.ArrayList) VirtualMachineLease(com.netflix.fenzo.VirtualMachineLease) WorkerPorts(io.mantisrx.common.WorkerPorts) TaskAssignmentResult(com.netflix.fenzo.TaskAssignmentResult) WorkerLaunchFailed(io.mantisrx.server.master.scheduler.WorkerLaunchFailed) WorkerLaunched(io.mantisrx.server.master.scheduler.WorkerLaunched)

Aggregations

TaskAssignmentResult (com.netflix.fenzo.TaskAssignmentResult)1 VirtualMachineLease (com.netflix.fenzo.VirtualMachineLease)1 WorkerPorts (io.mantisrx.common.WorkerPorts)1 LaunchTaskRequest (io.mantisrx.server.master.scheduler.LaunchTaskRequest)1 ScheduleRequest (io.mantisrx.server.master.scheduler.ScheduleRequest)1 WorkerLaunchFailed (io.mantisrx.server.master.scheduler.WorkerLaunchFailed)1 WorkerLaunched (io.mantisrx.server.master.scheduler.WorkerLaunched)1 ArrayList (java.util.ArrayList)1