use of es.bsc.compss.types.resources.WorkerResourceDescription in project compss by bsc-wdc.
the class TaskScheduler method errorOnAction.
/**
* Registers an error on the action given as a parameter. The action itself processes the error and triggers with
* any possible solution to re-execute it. This code is executed only on re-schedule (no resubmit)
*
* @param action
* action raising the error
*/
@SuppressWarnings("unchecked")
public final void errorOnAction(AllocatableAction action) {
LOGGER.warn("[TaskScheduler] Error on action " + action);
List<AllocatableAction> resourceFree = new LinkedList<>();
ResourceScheduler<WorkerResourceDescription> resource = (ResourceScheduler<WorkerResourceDescription>) action.getAssignedResource();
boolean failed = false;
// Process the action error (removes the assigned resource)
try {
action.error();
} catch (FailedActionException fae) {
// Action has completely failed
failed = true;
LOGGER.warn("[TaskScheduler] Action completely failed " + action);
removeFromReady(action);
// Free all the dependent tasks
for (AllocatableAction failedAction : action.failed()) {
try {
resourceFree.addAll(resource.unscheduleAction(failedAction));
} catch (ActionNotFoundException anfe) {
// Once the action starts running should cannot be moved from the resource
}
}
}
// We free the current task and get the free actions from the resource
try {
resourceFree.addAll(resource.unscheduleAction(action));
} catch (ActionNotFoundException anfe) {
// Once the action starts running should cannot be moved from the resource
}
workerLoadUpdate(resource);
if (!failed) {
// Try to re-schedule the action
Score actionScore = generateActionScore(action);
try {
scheduleAction(action, actionScore);
tryToLaunch(action);
} catch (BlockedActionException bae) {
removeFromReady(action);
addToBlocked(action);
}
}
List<AllocatableAction> blockedCandidates = new LinkedList<>();
handleDependencyFreeActions(new LinkedList<>(), resourceFree, blockedCandidates, resource);
for (AllocatableAction aa : blockedCandidates) {
removeFromReady(aa);
addToBlocked(aa);
}
}
use of es.bsc.compss.types.resources.WorkerResourceDescription in project compss by bsc-wdc.
the class TaskScheduler method reducedWorkerResources.
@SuppressWarnings("unchecked")
private <T extends WorkerResourceDescription> void reducedWorkerResources(ResourceScheduler<T> worker, ResourceUpdate<T> modification) {
CloudMethodWorker cloudWorker = (CloudMethodWorker) worker.getResource();
if (!cloudWorker.getDescription().getTypeComposition().isEmpty()) {
synchronized (workers) {
workers.remove(((ResourceScheduler<WorkerResourceDescription>) worker).getResource());
int coreCount = CoreManager.getCoreCount();
List<Implementation>[] runningCoreImpls = worker.getExecutableImpls();
for (int coreId = 0; coreId < coreCount; coreId++) {
for (Implementation impl : runningCoreImpls[coreId]) {
Profile p = worker.getProfile(impl);
if (p != null) {
offVMsProfiles[coreId][impl.getImplementationId()].accumulate(p);
}
}
}
}
this.workerRemoved((ResourceScheduler<WorkerResourceDescription>) worker);
StopWorkerAction action = new StopWorkerAction(generateSchedulingInformation(worker), worker, this, modification);
try {
action.schedule((ResourceScheduler<WorkerResourceDescription>) worker, (Score) null);
action.tryToLaunch();
} catch (BlockedActionException | UnassignedActionException | InvalidSchedulingException e) {
// Can not be blocked nor unassigned
}
} else {
ResourceManager.terminateCloudResource(cloudWorker, (CloudMethodResourceDescription) modification.getModification());
}
}
use of es.bsc.compss.types.resources.WorkerResourceDescription in project compss by bsc-wdc.
the class TaskScheduler method actionCompleted.
/**
* Registers an action as completed and releases all the resource and data dependencies.
*
* @param action
* action that has finished
*/
@SuppressWarnings("unchecked")
public final void actionCompleted(AllocatableAction action) {
LOGGER.info("[TaskScheduler] Action completed " + action);
// Mark action as finished
removeFromReady(action);
ResourceScheduler<WorkerResourceDescription> resource = (ResourceScheduler<WorkerResourceDescription>) action.getAssignedResource();
List<AllocatableAction> resourceFree;
try {
resourceFree = resource.unscheduleAction(action);
} catch (ActionNotFoundException ex) {
// Once the action starts running should cannot be moved from the resource
resourceFree = new LinkedList<>();
}
// Get the data free actions and mark them as ready
List<AllocatableAction> dataFreeActions = action.completed();
Iterator<AllocatableAction> dataFreeIter = dataFreeActions.iterator();
while (dataFreeIter.hasNext()) {
AllocatableAction dataFreeAction = dataFreeIter.next();
addToReady(dataFreeAction);
}
// We update the worker load
workerLoadUpdate(resource);
// Schedule data free actions
List<AllocatableAction> blockedCandidates = new LinkedList<>();
// Actions can only be scheduled and those that remain blocked must be added to the blockedCandidates list
// and those that remain unassigned must be added to the unassigned list
handleDependencyFreeActions(dataFreeActions, resourceFree, blockedCandidates, resource);
for (AllocatableAction aa : blockedCandidates) {
removeFromReady(aa);
addToBlocked(aa);
}
}
use of es.bsc.compss.types.resources.WorkerResourceDescription in project compss by bsc-wdc.
the class MOResourceScheduler method localOptimization.
/*--------------------------------------------------
---------------------------------------------------
-------------- Optimization Methods ---------------
---------------------------------------------------
--------------------------------------------------*/
@SuppressWarnings("unchecked")
public PriorityQueue<AllocatableAction> localOptimization(long updateId, Comparator<AllocatableAction> selectionComparator, Comparator<AllocatableAction> donorComparator) {
// System.out.println("Local Optimization for " + this.getName() + " starts");
LocalOptimizationState state = new LocalOptimizationState(updateId, (MOResourceScheduler<WorkerResourceDescription>) this, getReadyComparator(), selectionComparator);
PriorityQueue<AllocatableAction> actions = new PriorityQueue<AllocatableAction>(1, donorComparator);
synchronized (gaps) {
opAction = new OptimizationAction();
}
// No changes in the Gap structure
// Scan actions: Filters ready and selectable actions
LOGGER.debug(LOG_PREFIX + "Scanning current actions");
List<AllocatableAction> lockedActions = scanActions(state);
// Gets all the pending schedulings
List<AllocatableAction> newPendingSchedulings = new LinkedList<>();
List<AllocatableAction> pendingSchedulings;
synchronized (gaps) {
MOSchedulingInformation opDSI = (MOSchedulingInformation) opAction.getSchedulingInfo();
pendingSchedulings = opDSI.replaceSuccessors(newPendingSchedulings);
}
// Classify pending actions: Filters ready and selectable actions
LOGGER.debug(LOG_PREFIX + "Classify Pending Scheduling/Unscheduling actions");
classifyPendingSchedulings(pendingSchedulings, state);
classifyPendingUnschedulings(state);
// ClassifyActions
LOGGER.debug(LOG_PREFIX + "Reschedule pending actions");
List<Gap> newGaps = rescheduleTasks(state, actions);
// Ensuring there are no locked actions after rescheduling
for (AllocatableAction action : lockedActions) {
MOSchedulingInformation actionDSI = (MOSchedulingInformation) action.getSchedulingInfo();
try {
actionDSI.unlock();
} catch (IllegalMonitorStateException e) {
LOGGER.debug(LOG_PREFIX + "Illegal Monitor Exception when releasing locked actions. Ignoring...");
}
}
/*
* System.out.println("\t is running: "); for (AllocatableAction aa : state.getRunningActions()) {
* System.out.println("\t\t" + aa + " with implementation " + ((aa.getAssignedImplementation() == null) ? "null"
* : aa .getAssignedImplementation().getImplementationId()) + " started " + ((aa.getStartTime() == null) ? "-" :
* (System .currentTimeMillis() - aa.getStartTime())));
*
* }
*
* System.out.println(this.getName() + " has no resources for: "); for (AllocatableAction aa :
* this.resourceBlockingAction .getDataSuccessors()) { System.out .println("\t" + aa + " with" +
* " implementation " + ((aa.getAssignedImplementation() == null) ? "null" : aa.getAssignedImplementation()
* .getImplementationId())); } System.out .println(this.getName() +
* " will wait for data producers to be rescheduled for actions:"); for (AllocatableAction aa :
* this.dataBlockingAction.getDataSuccessors()) { System.out .println("\t" + aa + " with" + " implementation " +
* ((aa.getAssignedImplementation() == null) ? "null" : aa.getAssignedImplementation() .getImplementationId()));
* }
*/
// Schedules all the pending scheduligns and unblocks the scheduling of new actions
LOGGER.debug(LOG_PREFIX + "Manage new gaps");
synchronized (gaps) {
gaps.clear();
gaps.addAll(newGaps);
MOSchedulingInformation opDSI = (MOSchedulingInformation) opAction.getSchedulingInfo();
List<AllocatableAction> successors = opDSI.getSuccessors();
for (AllocatableAction action : successors) {
actions.add(action);
MOSchedulingInformation actionDSI = (MOSchedulingInformation) action.getSchedulingInfo();
actionDSI.lock();
actionDSI.removePredecessor(opAction);
this.scheduleUsingGaps(action, gaps);
actionDSI.unlock();
}
opDSI.clearSuccessors();
opAction = null;
}
// System.out.println("Local Optimization for " + this.getName() + " ends");
return actions;
}
use of es.bsc.compss.types.resources.WorkerResourceDescription in project compss by bsc-wdc.
the class StopWorkerAction method doAction.
@Override
protected void doAction() {
(new Thread() {
@SuppressWarnings("unchecked")
@Override
public void run() {
Worker<WorkerResourceDescription> wResource = (Worker<WorkerResourceDescription>) worker.getResource();
Thread.currentThread().setName(wResource.getName() + " stopper");
wResource.retrieveData(true);
Semaphore sem = new Semaphore(0);
ShutdownListener sl = new ShutdownListener(sem);
wResource.stop(sl);
sl.enable();
try {
sem.acquire();
} catch (Exception e) {
LOGGER.error("ERROR: Exception raised on worker shutdown", e);
ErrorManager.warn("Exception stopping worker. Check runtime.log for more details", e);
notifyError();
}
notifyCompleted();
}
}).start();
}
Aggregations