Search in sources :

Example 6 with WorkerResourceDescription

use of es.bsc.compss.types.resources.WorkerResourceDescription in project compss by bsc-wdc.

the class TaskScheduler method errorOnAction.

/**
 * Registers an error on the action given as a parameter. The action itself processes the error and triggers with
 * any possible solution to re-execute it. This code is executed only on re-schedule (no resubmit)
 *
 * @param action
 *            action raising the error
 */
@SuppressWarnings("unchecked")
public final void errorOnAction(AllocatableAction action) {
    LOGGER.warn("[TaskScheduler] Error on action " + action);
    List<AllocatableAction> resourceFree = new LinkedList<>();
    ResourceScheduler<WorkerResourceDescription> resource = (ResourceScheduler<WorkerResourceDescription>) action.getAssignedResource();
    boolean failed = false;
    // Process the action error (removes the assigned resource)
    try {
        action.error();
    } catch (FailedActionException fae) {
        // Action has completely failed
        failed = true;
        LOGGER.warn("[TaskScheduler] Action completely failed " + action);
        removeFromReady(action);
        // Free all the dependent tasks
        for (AllocatableAction failedAction : action.failed()) {
            try {
                resourceFree.addAll(resource.unscheduleAction(failedAction));
            } catch (ActionNotFoundException anfe) {
            // Once the action starts running should cannot be moved from the resource
            }
        }
    }
    // We free the current task and get the free actions from the resource
    try {
        resourceFree.addAll(resource.unscheduleAction(action));
    } catch (ActionNotFoundException anfe) {
    // Once the action starts running should cannot be moved from the resource
    }
    workerLoadUpdate(resource);
    if (!failed) {
        // Try to re-schedule the action
        Score actionScore = generateActionScore(action);
        try {
            scheduleAction(action, actionScore);
            tryToLaunch(action);
        } catch (BlockedActionException bae) {
            removeFromReady(action);
            addToBlocked(action);
        }
    }
    List<AllocatableAction> blockedCandidates = new LinkedList<>();
    handleDependencyFreeActions(new LinkedList<>(), resourceFree, blockedCandidates, resource);
    for (AllocatableAction aa : blockedCandidates) {
        removeFromReady(aa);
        addToBlocked(aa);
    }
}
Also used : ActionNotFoundException(es.bsc.compss.scheduler.exceptions.ActionNotFoundException) Score(es.bsc.compss.scheduler.types.Score) BlockedActionException(es.bsc.compss.scheduler.exceptions.BlockedActionException) WorkerResourceDescription(es.bsc.compss.types.resources.WorkerResourceDescription) AllocatableAction(es.bsc.compss.scheduler.types.AllocatableAction) LinkedList(java.util.LinkedList) FailedActionException(es.bsc.compss.scheduler.exceptions.FailedActionException)

Example 7 with WorkerResourceDescription

use of es.bsc.compss.types.resources.WorkerResourceDescription in project compss by bsc-wdc.

the class TaskScheduler method reducedWorkerResources.

@SuppressWarnings("unchecked")
private <T extends WorkerResourceDescription> void reducedWorkerResources(ResourceScheduler<T> worker, ResourceUpdate<T> modification) {
    CloudMethodWorker cloudWorker = (CloudMethodWorker) worker.getResource();
    if (!cloudWorker.getDescription().getTypeComposition().isEmpty()) {
        synchronized (workers) {
            workers.remove(((ResourceScheduler<WorkerResourceDescription>) worker).getResource());
            int coreCount = CoreManager.getCoreCount();
            List<Implementation>[] runningCoreImpls = worker.getExecutableImpls();
            for (int coreId = 0; coreId < coreCount; coreId++) {
                for (Implementation impl : runningCoreImpls[coreId]) {
                    Profile p = worker.getProfile(impl);
                    if (p != null) {
                        offVMsProfiles[coreId][impl.getImplementationId()].accumulate(p);
                    }
                }
            }
        }
        this.workerRemoved((ResourceScheduler<WorkerResourceDescription>) worker);
        StopWorkerAction action = new StopWorkerAction(generateSchedulingInformation(worker), worker, this, modification);
        try {
            action.schedule((ResourceScheduler<WorkerResourceDescription>) worker, (Score) null);
            action.tryToLaunch();
        } catch (BlockedActionException | UnassignedActionException | InvalidSchedulingException e) {
        // Can not be blocked nor unassigned
        }
    } else {
        ResourceManager.terminateCloudResource(cloudWorker, (CloudMethodResourceDescription) modification.getModification());
    }
}
Also used : UnassignedActionException(es.bsc.compss.scheduler.exceptions.UnassignedActionException) CloudMethodWorker(es.bsc.compss.types.resources.CloudMethodWorker) BlockedActionException(es.bsc.compss.scheduler.exceptions.BlockedActionException) WorkerResourceDescription(es.bsc.compss.types.resources.WorkerResourceDescription) InvalidSchedulingException(es.bsc.compss.scheduler.exceptions.InvalidSchedulingException) LinkedList(java.util.LinkedList) List(java.util.List) StopWorkerAction(es.bsc.compss.scheduler.types.allocatableactions.StopWorkerAction) Implementation(es.bsc.compss.types.implementations.Implementation) Profile(es.bsc.compss.scheduler.types.Profile)

Example 8 with WorkerResourceDescription

use of es.bsc.compss.types.resources.WorkerResourceDescription in project compss by bsc-wdc.

the class TaskScheduler method actionCompleted.

/**
 * Registers an action as completed and releases all the resource and data dependencies.
 *
 * @param action
 *            action that has finished
 */
@SuppressWarnings("unchecked")
public final void actionCompleted(AllocatableAction action) {
    LOGGER.info("[TaskScheduler] Action completed " + action);
    // Mark action as finished
    removeFromReady(action);
    ResourceScheduler<WorkerResourceDescription> resource = (ResourceScheduler<WorkerResourceDescription>) action.getAssignedResource();
    List<AllocatableAction> resourceFree;
    try {
        resourceFree = resource.unscheduleAction(action);
    } catch (ActionNotFoundException ex) {
        // Once the action starts running should cannot be moved from the resource
        resourceFree = new LinkedList<>();
    }
    // Get the data free actions and mark them as ready
    List<AllocatableAction> dataFreeActions = action.completed();
    Iterator<AllocatableAction> dataFreeIter = dataFreeActions.iterator();
    while (dataFreeIter.hasNext()) {
        AllocatableAction dataFreeAction = dataFreeIter.next();
        addToReady(dataFreeAction);
    }
    // We update the worker load
    workerLoadUpdate(resource);
    // Schedule data free actions
    List<AllocatableAction> blockedCandidates = new LinkedList<>();
    // Actions can only be scheduled and those that remain blocked must be added to the blockedCandidates list
    // and those that remain unassigned must be added to the unassigned list
    handleDependencyFreeActions(dataFreeActions, resourceFree, blockedCandidates, resource);
    for (AllocatableAction aa : blockedCandidates) {
        removeFromReady(aa);
        addToBlocked(aa);
    }
}
Also used : ActionNotFoundException(es.bsc.compss.scheduler.exceptions.ActionNotFoundException) WorkerResourceDescription(es.bsc.compss.types.resources.WorkerResourceDescription) AllocatableAction(es.bsc.compss.scheduler.types.AllocatableAction) LinkedList(java.util.LinkedList)

Example 9 with WorkerResourceDescription

use of es.bsc.compss.types.resources.WorkerResourceDescription in project compss by bsc-wdc.

the class MOResourceScheduler method localOptimization.

/*--------------------------------------------------
     ---------------------------------------------------
     -------------- Optimization Methods ---------------
     ---------------------------------------------------
     --------------------------------------------------*/
@SuppressWarnings("unchecked")
public PriorityQueue<AllocatableAction> localOptimization(long updateId, Comparator<AllocatableAction> selectionComparator, Comparator<AllocatableAction> donorComparator) {
    // System.out.println("Local Optimization for " + this.getName() + " starts");
    LocalOptimizationState state = new LocalOptimizationState(updateId, (MOResourceScheduler<WorkerResourceDescription>) this, getReadyComparator(), selectionComparator);
    PriorityQueue<AllocatableAction> actions = new PriorityQueue<AllocatableAction>(1, donorComparator);
    synchronized (gaps) {
        opAction = new OptimizationAction();
    }
    // No changes in the Gap structure
    // Scan actions: Filters ready and selectable actions
    LOGGER.debug(LOG_PREFIX + "Scanning current actions");
    List<AllocatableAction> lockedActions = scanActions(state);
    // Gets all the pending schedulings
    List<AllocatableAction> newPendingSchedulings = new LinkedList<>();
    List<AllocatableAction> pendingSchedulings;
    synchronized (gaps) {
        MOSchedulingInformation opDSI = (MOSchedulingInformation) opAction.getSchedulingInfo();
        pendingSchedulings = opDSI.replaceSuccessors(newPendingSchedulings);
    }
    // Classify pending actions: Filters ready and selectable actions
    LOGGER.debug(LOG_PREFIX + "Classify Pending Scheduling/Unscheduling actions");
    classifyPendingSchedulings(pendingSchedulings, state);
    classifyPendingUnschedulings(state);
    // ClassifyActions
    LOGGER.debug(LOG_PREFIX + "Reschedule pending actions");
    List<Gap> newGaps = rescheduleTasks(state, actions);
    // Ensuring there are no locked actions after rescheduling
    for (AllocatableAction action : lockedActions) {
        MOSchedulingInformation actionDSI = (MOSchedulingInformation) action.getSchedulingInfo();
        try {
            actionDSI.unlock();
        } catch (IllegalMonitorStateException e) {
            LOGGER.debug(LOG_PREFIX + "Illegal Monitor Exception when releasing locked actions. Ignoring...");
        }
    }
    /*
         * System.out.println("\t is running: "); for (AllocatableAction aa : state.getRunningActions()) {
         * System.out.println("\t\t" + aa + " with implementation " + ((aa.getAssignedImplementation() == null) ? "null"
         * : aa .getAssignedImplementation().getImplementationId()) + " started " + ((aa.getStartTime() == null) ? "-" :
         * (System .currentTimeMillis() - aa.getStartTime())));
         * 
         * }
         * 
         * System.out.println(this.getName() + " has no resources for: "); for (AllocatableAction aa :
         * this.resourceBlockingAction .getDataSuccessors()) { System.out .println("\t" + aa + " with" +
         * " implementation " + ((aa.getAssignedImplementation() == null) ? "null" : aa.getAssignedImplementation()
         * .getImplementationId())); } System.out .println(this.getName() +
         * " will wait for data producers to be rescheduled for actions:"); for (AllocatableAction aa :
         * this.dataBlockingAction.getDataSuccessors()) { System.out .println("\t" + aa + " with" + " implementation " +
         * ((aa.getAssignedImplementation() == null) ? "null" : aa.getAssignedImplementation() .getImplementationId()));
         * }
         */
    // Schedules all the pending scheduligns and unblocks the scheduling of new actions
    LOGGER.debug(LOG_PREFIX + "Manage new gaps");
    synchronized (gaps) {
        gaps.clear();
        gaps.addAll(newGaps);
        MOSchedulingInformation opDSI = (MOSchedulingInformation) opAction.getSchedulingInfo();
        List<AllocatableAction> successors = opDSI.getSuccessors();
        for (AllocatableAction action : successors) {
            actions.add(action);
            MOSchedulingInformation actionDSI = (MOSchedulingInformation) action.getSchedulingInfo();
            actionDSI.lock();
            actionDSI.removePredecessor(opAction);
            this.scheduleUsingGaps(action, gaps);
            actionDSI.unlock();
        }
        opDSI.clearSuccessors();
        opAction = null;
    }
    // System.out.println("Local Optimization for " + this.getName() + " ends");
    return actions;
}
Also used : OptimizationAction(es.bsc.compss.scheduler.multiobjective.types.OptimizationAction) LocalOptimizationState(es.bsc.compss.scheduler.multiobjective.types.LocalOptimizationState) Gap(es.bsc.compss.scheduler.multiobjective.types.Gap) WorkerResourceDescription(es.bsc.compss.types.resources.WorkerResourceDescription) AllocatableAction(es.bsc.compss.scheduler.types.AllocatableAction) PriorityQueue(java.util.PriorityQueue) LinkedList(java.util.LinkedList)

Example 10 with WorkerResourceDescription

use of es.bsc.compss.types.resources.WorkerResourceDescription in project compss by bsc-wdc.

the class StopWorkerAction method doAction.

@Override
protected void doAction() {
    (new Thread() {

        @SuppressWarnings("unchecked")
        @Override
        public void run() {
            Worker<WorkerResourceDescription> wResource = (Worker<WorkerResourceDescription>) worker.getResource();
            Thread.currentThread().setName(wResource.getName() + " stopper");
            wResource.retrieveData(true);
            Semaphore sem = new Semaphore(0);
            ShutdownListener sl = new ShutdownListener(sem);
            wResource.stop(sl);
            sl.enable();
            try {
                sem.acquire();
            } catch (Exception e) {
                LOGGER.error("ERROR: Exception raised on worker shutdown", e);
                ErrorManager.warn("Exception stopping worker. Check runtime.log for more details", e);
                notifyError();
            }
            notifyCompleted();
        }
    }).start();
}
Also used : ShutdownListener(es.bsc.compss.types.resources.ShutdownListener) WorkerResourceDescription(es.bsc.compss.types.resources.WorkerResourceDescription) CloudMethodWorker(es.bsc.compss.types.resources.CloudMethodWorker) Worker(es.bsc.compss.types.resources.Worker) Semaphore(java.util.concurrent.Semaphore) BlockedActionException(es.bsc.compss.scheduler.exceptions.BlockedActionException) UnassignedActionException(es.bsc.compss.scheduler.exceptions.UnassignedActionException) FailedActionException(es.bsc.compss.scheduler.exceptions.FailedActionException)

Aggregations

WorkerResourceDescription (es.bsc.compss.types.resources.WorkerResourceDescription)10 LinkedList (java.util.LinkedList)6 BlockedActionException (es.bsc.compss.scheduler.exceptions.BlockedActionException)5 AllocatableAction (es.bsc.compss.scheduler.types.AllocatableAction)5 UnassignedActionException (es.bsc.compss.scheduler.exceptions.UnassignedActionException)4 FailedActionException (es.bsc.compss.scheduler.exceptions.FailedActionException)3 CloudMethodWorker (es.bsc.compss.types.resources.CloudMethodWorker)3 Worker (es.bsc.compss.types.resources.Worker)3 List (java.util.List)3 ResourceScheduler (es.bsc.compss.components.impl.ResourceScheduler)2 ActionNotFoundException (es.bsc.compss.scheduler.exceptions.ActionNotFoundException)2 Gap (es.bsc.compss.scheduler.multiobjective.types.Gap)2 Implementation (es.bsc.compss.types.implementations.Implementation)2 PriorityQueue (java.util.PriorityQueue)2 Action (commons.Action)1 InvalidSchedulingException (es.bsc.compss.scheduler.exceptions.InvalidSchedulingException)1 LocalOptimizationState (es.bsc.compss.scheduler.multiobjective.types.LocalOptimizationState)1 MOProfile (es.bsc.compss.scheduler.multiobjective.types.MOProfile)1 OptimizationAction (es.bsc.compss.scheduler.multiobjective.types.OptimizationAction)1 SchedulingEvent (es.bsc.compss.scheduler.multiobjective.types.SchedulingEvent)1