use of org.objectweb.proactive.utils.NamedThreadFactory in project scheduling by ow2-proactive.
the class NodesRecoveryManager method recoverNodes.
public void recoverNodes(NodeSource nodeSource) {
// this log line is important for performance tests
logger.info(START_TO_RECOVER_NODES);
String nodeSourceName = nodeSource.getName();
Collection<RMNodeData> nodesData = this.rmCore.getDbManager().getNodesByNodeSource(nodeSourceName);
logger.info("Number of nodes found in database for node source " + nodeSourceName + ": " + nodesData.size());
List<RMNode> recoveredEligibleNodes = Collections.synchronizedList(new ArrayList<>());
Map<NodeState, Integer> recoveredNodeStatesCounter = new HashMap<>();
// for each node found in database, try to lookup node or recover it
// as down node
ExecutorService nodeRecoveryThreadPool = PAExecutors.newCachedBoundedThreadPool(1, PAResourceManagerProperties.RM_NODESOURCE_MAX_THREAD_NUMBER.getValueAsInt(), 120L, TimeUnit.SECONDS, new NamedThreadFactory("NodeRecoveryThreadPool"));
List<Future<RMNode>> nodesFutures = new ArrayList<>(nodesData.size());
for (RMNodeData rmNodeData : nodesData) {
nodesFutures.add(nodeRecoveryThreadPool.submit(() -> this.recoverNode(rmNodeData, nodeSource, recoveredNodeStatesCounter)));
}
for (Future<RMNode> rmNodeFuture : nodesFutures) {
RMNode node = null;
try {
node = rmNodeFuture.get();
} catch (Exception e) {
logger.error("Unexpected error occurred while recovering node source " + nodeSource.getName(), e);
nodeRecoveryThreadPool.shutdownNow();
return;
}
if (this.isEligible(node)) {
recoveredEligibleNodes.add(node);
}
if (node != null) {
final RMNodeEvent event = node.createNodeEvent(RMEventType.NODE_ADDED, null, node.getProvider().getName());
this.rmCore.registerAndEmitNodeEvent(event);
}
}
nodeRecoveryThreadPool.shutdownNow();
this.rmCore.addEligibleNodesToRecover(recoveredEligibleNodes);
this.logNodeRecoverySummary(nodeSourceName, recoveredNodeStatesCounter, recoveredEligibleNodes.size());
}
use of org.objectweb.proactive.utils.NamedThreadFactory in project scheduling by ow2-proactive.
the class SchedulerStateRecoverHelper method recover.
public RecoveredSchedulerState recover(long loadJobPeriod, RMProxy rmProxy, SchedulerStatus schedulerStatus) {
dbManager.setTaskDataOwnerIfNull();
List<InternalJob> notFinishedJobs = dbManager.loadNotFinishedJobs(true);
Vector<InternalJob> pendingJobs = new Vector<>();
Vector<InternalJob> runningJobs = new Vector<>();
ExecutorService recoverRunningTasksThreadPool = PAExecutors.newCachedBoundedThreadPool(1, PASchedulerProperties.SCHEDULER_PARALLEL_SCHEDULER_STATE_RECOVER_NBTHREAD.getValueAsInt(), 60L, TimeUnit.SECONDS, new NamedThreadFactory("TaskRecoverThreadPool"));
for (InternalJob job : notFinishedJobs) {
recoverJob(rmProxy, pendingJobs, runningJobs, job, recoverRunningTasksThreadPool);
}
recoverRunningTasksThreadPool.shutdown();
boolean terminatedWithoutTimeout;
try {
terminatedWithoutTimeout = recoverRunningTasksThreadPool.awaitTermination(PASchedulerProperties.SCHEDULER_PARALLEL_SCHEDULER_STATE_RECOVER_TIMEOUT.getValueAsInt(), TimeUnit.MINUTES);
} catch (InterruptedException e) {
logger.error("Interrupted while waiting for the Scheduler state to be recovered", e);
Thread.currentThread().interrupt();
throw new SchedulerStateNotRecoveredException(e);
}
failIfSchedulerStateRecoveryTimeout(terminatedWithoutTimeout);
applyJobUpdates(notFinishedJobs);
Vector<InternalJob> finishedJobs = new Vector<>();
for (Iterator<InternalJob> iterator = runningJobs.iterator(); iterator.hasNext(); ) {
InternalJob job = iterator.next();
try {
List<InternalTask> tasksList = copyAndSort(job.getITasks());
// simulate the running execution to recreate the tree.
for (InternalTask task : tasksList) {
job.recoverTask(task.getId());
}
if (job.getStatus() == JobStatus.PAUSED) {
job.setStatus(JobStatus.STALLED);
job.setPaused();
// update the count of pending and running task.
job.setNumberOfPendingTasks(job.getNumberOfPendingTasks() + job.getNumberOfRunningTasks());
job.setNumberOfRunningTasks(0);
}
} catch (Throwable e) {
logger.error("Failed to recover job " + job.getId() + " " + job.getName() + " job might be in a inconsistent state", e);
jobLogger.error(job.getId(), "Failed to recover job, job might be in an inconsistent state", e);
// partially cancel job (not tasks) and move it to finished jobs to avoid running it
iterator.remove();
job.setStatus(JobStatus.CANCELED);
finishedJobs.add(job);
dbManager.updateJobAndTasksState(job);
}
}
finishedJobs.addAll(dbManager.loadFinishedJobs(false, loadJobPeriod));
logger.info("[Recovering counters] " + " Pending: " + pendingJobs.size() + " Running: " + runningJobs.size() + " Finished: " + finishedJobs.size());
return new RecoveredSchedulerState(pendingJobs, runningJobs, finishedJobs, schedulerStatus);
}
use of org.objectweb.proactive.utils.NamedThreadFactory in project scheduling by ow2-proactive.
the class SchedulerFrontend method initActivity.
/**
* @see org.objectweb.proactive.InitActive#initActivity(org.objectweb.proactive.Body)
*/
@Override
public void initActivity(Body body) {
try {
// setting up the policy
logger.debug("Setting up scheduler security policy");
ClientsPolicy.init();
// creating the scheduler authentication interface.
// if this fails then it will not continue.
logger.debug("Creating scheduler authentication interface...");
authentication = PAActiveObject.newActive(SchedulerAuthentication.class, new Object[] { PAActiveObject.getStubOnThis() });
// creating scheduler core
DataSpaceServiceStarter dsServiceStarter = DataSpaceServiceStarter.getDataSpaceServiceStarter();
dsServiceStarter.startNamingService();
ExecutorService clientThreadPool = PAExecutors.newCachedBoundedThreadPool(1, PASchedulerProperties.SCHEDULER_CLIENT_POOL_NBTHREAD.getValueAsInt(), 120L, TimeUnit.SECONDS, new NamedThreadFactory("ClientRequestsThreadPool", false, 3));
ExecutorService internalThreadPool = PAExecutors.newCachedBoundedThreadPool(1, PASchedulerProperties.SCHEDULER_INTERNAL_POOL_NBTHREAD.getValueAsInt(), 120L, TimeUnit.SECONDS, new NamedThreadFactory("InternalOperationsThreadPool", false, 7));
ExecutorService taskPingerThreadPool = PAExecutors.newCachedBoundedThreadPool(1, PASchedulerProperties.SCHEDULER_TASK_PINGER_POOL_NBTHREAD.getValueAsInt(), 120L, TimeUnit.SECONDS, new NamedThreadFactory("TaskPingerThreadPool", false, 2));
ScheduledExecutorService scheduledThreadPool = new ScheduledThreadPoolExecutor(PASchedulerProperties.SCHEDULER_SCHEDULED_POOL_NBTHREAD.getValueAsInt(), new NamedThreadFactory("SchedulingServiceTimerThread", false, 2));
// at this point we must wait the resource manager
RMConnection.waitAndJoin(rmURL.toString());
RMProxiesManager rmProxiesManager = RMProxiesManager.createRMProxiesManager(rmURL);
RMProxy rmProxy = rmProxiesManager.getRmProxy();
long loadJobPeriod = -1;
if (PASchedulerProperties.SCHEDULER_DB_LOAD_JOB_PERIOD.isSet()) {
String periodStr = PASchedulerProperties.SCHEDULER_DB_LOAD_JOB_PERIOD.getValueAsString();
if (periodStr != null && !periodStr.isEmpty()) {
try {
loadJobPeriod = Tools.parsePeriod(periodStr);
} catch (IllegalArgumentException e) {
logger.warn("Invalid load job period string: " + periodStr + ", this setting is ignored", e);
}
}
}
logger.debug("Booting jmx...");
this.jmxHelper.boot(authentication);
publicStore = startSynchronizationService();
RecoveredSchedulerState recoveredState = new SchedulerStateRecoverHelper(dbManager).recover(loadJobPeriod, rmProxy, initialStatus);
this.frontendState = new SchedulerFrontendState(recoveredState.getSchedulerState(), jmxHelper, dbManager);
SchedulingInfrastructure infrastructure = new SchedulingInfrastructureImpl(dbManager, rmProxiesManager, dsServiceStarter, clientThreadPool, internalThreadPool, taskPingerThreadPool, scheduledThreadPool);
this.spacesSupport = infrastructure.getSpacesSupport();
ServerJobAndTaskLogs.getInstance().setSpacesSupport(this.spacesSupport);
this.corePublicKey = Credentials.getPublicKey(PASchedulerProperties.getAbsolutePath(PASchedulerProperties.SCHEDULER_AUTH_PUBKEY_PATH.getValueAsString()));
this.schedulingService = new SchedulingService(infrastructure, frontendState, recoveredState, policyFullName, null, publicStore);
recoveredState.enableLiveLogsForRunningTasks(schedulingService);
releaseBusyNodesWithNoRunningTask(rmProxy, recoveredState);
logger.debug("Registering scheduler...");
PAActiveObject.registerByName(authentication, SchedulerConstants.SCHEDULER_DEFAULT_NAME);
authentication.setActivated(true);
Tools.logAvailableScriptEngines(logger);
if (PASchedulerProperties.SCHEDULER_MEM_MONITORING_FREQ.isSet()) {
logger.debug("Starting the memory monitoring process...");
metricsMonitorScheduler = new it.sauronsoftware.cron4j.Scheduler();
String cronExpr = PASchedulerProperties.SCHEDULER_MEM_MONITORING_FREQ.getValueAsString();
metricsMonitorScheduler.schedule(cronExpr, new TableSizeMonitorRunner(dbManager.getTransactionHelper()));
metricsMonitorScheduler.schedule(cronExpr, new JobsMemoryMonitorRunner(dbManager.getSessionFactory().getStatistics(), recoveredState.getSchedulerState()));
metricsMonitorScheduler.start();
}
} catch (Exception e) {
logger.fatal("Failed to start Scheduler", e);
e.printStackTrace();
System.exit(1);
}
}
Aggregations