use of com.cloud.managed.context.ManagedContextRunnable in project cosmic by MissionCriticalCloud.
the class ClusterManagerImpl method getNotificationTask.
private Runnable getNotificationTask() {
return new ManagedContextRunnable() {
@Override
protected void runInContext() {
while (true) {
synchronized (_notificationMsgs) {
try {
_notificationMsgs.wait(1000);
} catch (final InterruptedException e) {
}
}
ClusterManagerMessage msg = null;
while ((msg = getNextNotificationMessage()) != null) {
switch(msg.getMessageType()) {
case nodeAdded:
if (msg.getNodes() != null && msg.getNodes().size() > 0) {
final Profiler profiler = new Profiler();
profiler.start();
notifyNodeJoined(msg.getNodes());
profiler.stop();
if (profiler.getDurationInMillis() > 1000) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Notifying management server join event took " + profiler.getDurationInMillis() + " ms");
}
} else {
s_logger.warn("Notifying management server join event took " + profiler.getDurationInMillis() + " ms");
}
}
break;
case nodeRemoved:
if (msg.getNodes() != null && msg.getNodes().size() > 0) {
final Profiler profiler = new Profiler();
profiler.start();
notifyNodeLeft(msg.getNodes());
profiler.stop();
if (profiler.getDurationInMillis() > 1000) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Notifying management server leave event took " + profiler.getDurationInMillis() + " ms");
}
} else {
s_logger.warn("Notifying management server leave event took " + profiler.getDurationInMillis() + " ms");
}
}
break;
case nodeIsolated:
notifyNodeIsolated();
break;
default:
assert false;
break;
}
}
try {
Thread.sleep(1000);
} catch (final InterruptedException e) {
s_logger.warn("Caught (previously ignored) interrupted exception", e);
}
}
}
};
}
use of com.cloud.managed.context.ManagedContextRunnable in project cosmic by MissionCriticalCloud.
the class ClusterManagerImpl method onNotifyingClusterPdu.
private void onNotifyingClusterPdu() {
while (true) {
final ClusterServicePdu pdu = popIncomingClusterPdu(1000);
if (pdu == null) {
continue;
}
_executor.execute(new ManagedContextRunnable() {
@Override
protected void runInContext() {
if (pdu.getPduType() == ClusterServicePdu.PDU_TYPE_RESPONSE) {
final ClusterServiceRequestPdu requestPdu = popRequestPdu(pdu.getAckSequenceId());
if (requestPdu != null) {
requestPdu.setResponseResult(pdu.getJsonPackage());
synchronized (requestPdu) {
requestPdu.notifyAll();
}
} else {
s_logger.warn("Original request has already been cancelled. pdu: " + pdu.getJsonPackage());
}
} else {
String result = _dispatcher.dispatch(pdu);
if (result == null) {
result = "";
}
if (pdu.getPduType() == ClusterServicePdu.PDU_TYPE_REQUEST) {
final ClusterServicePdu responsePdu = new ClusterServicePdu();
responsePdu.setPduType(ClusterServicePdu.PDU_TYPE_RESPONSE);
responsePdu.setSourcePeer(pdu.getDestPeer());
responsePdu.setDestPeer(pdu.getSourcePeer());
responsePdu.setAckSequenceId(pdu.getSequenceId());
responsePdu.setJsonPackage(result);
addOutgoingClusterPdu(responsePdu);
}
}
}
});
}
}
use of com.cloud.managed.context.ManagedContextRunnable in project cosmic by MissionCriticalCloud.
the class AsyncJobManagerImpl method getHeartbeatTask.
private Runnable getHeartbeatTask() {
return new ManagedContextRunnable() {
@Override
protected void runInContext() {
final GlobalLock scanLock = GlobalLock.getInternLock("AsyncJobManagerHeartbeat");
try {
if (scanLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_COOPERATION)) {
try {
reallyRun();
} finally {
scanLock.unlock();
}
}
} finally {
scanLock.releaseRef();
}
}
protected void reallyRun() {
try {
final List<SyncQueueItemVO> l = _queueMgr.dequeueFromAny(getMsid(), MAX_ONETIME_SCHEDULE_SIZE);
if (l != null && l.size() > 0) {
for (final SyncQueueItemVO item : l) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Execute sync-queue item: " + item.toString());
}
executeQueueItem(item, false);
}
}
final List<Long> standaloneWakeupJobs = wakeupScan();
for (final Long jobId : standaloneWakeupJobs) {
// TODO, we assume that all jobs in this category is API job only
final AsyncJobVO job = _jobDao.findById(jobId);
if (job != null && (job.getPendingSignals() & AsyncJob.Constants.SIGNAL_MASK_WAKEUP) != 0) {
scheduleExecution(job, false);
}
}
} catch (final Throwable e) {
s_logger.error("Unexpected exception when trying to execute queue item, ", e);
}
}
};
}
use of com.cloud.managed.context.ManagedContextRunnable in project cosmic by MissionCriticalCloud.
the class ClusteredAgentManagerImpl method getTransferScanTask.
private Runnable getTransferScanTask() {
return new ManagedContextRunnable() {
@Override
protected void runInContext() {
if (s_logger.isTraceEnabled()) {
s_logger.trace("Clustered agent transfer scan check, management server id:" + _nodeId);
}
synchronized (_agentToTransferIds) {
if (_agentToTransferIds.size() > 0) {
s_logger.debug("Found " + _agentToTransferIds.size() + " agents to transfer");
// for (Long hostId : _agentToTransferIds) {
for (final Iterator<Long> iterator = _agentToTransferIds.iterator(); iterator.hasNext(); ) {
final Long hostId = iterator.next();
final AgentAttache attache = findAttache(hostId);
// if the thread:
// 1) timed out waiting for the host to reconnect
// 2) recipient management server is not active any more
// 3) if the management server doesn't own the host any more
// remove the host from re-balance list and delete from op_host_transfer DB
// no need to do anything with the real attache as we haven't modified it yet
final Date cutTime = DateUtil.currentGMTTime();
final HostTransferMapVO transferMap = _hostTransferDao.findActiveHostTransferMapByHostId(hostId, new Date(cutTime.getTime() - rebalanceTimeOut));
if (transferMap == null) {
s_logger.debug("Timed out waiting for the host id=" + hostId + " to be ready to transfer, skipping rebalance for the host");
iterator.remove();
_hostTransferDao.completeAgentTransfer(hostId);
continue;
}
if (transferMap.getInitialOwner() != _nodeId || attache == null || attache.forForward()) {
s_logger.debug("Management server " + _nodeId + " doesn't own host id=" + hostId + " any more, skipping rebalance for the host");
iterator.remove();
_hostTransferDao.completeAgentTransfer(hostId);
continue;
}
final ManagementServerHostVO futureOwner = _mshostDao.findByMsid(transferMap.getFutureOwner());
final ManagementServerHostVO initialOwner = _mshostDao.findByMsid(transferMap.getInitialOwner());
if (futureOwner != null && futureOwner.getState() != ManagementServerHost.State.Up) {
s_logger.debug("Can't transfer host " + hostId + " as it's future owner is not in UP state: " + futureOwner + ", skipping rebalance");
iterator.remove();
_hostTransferDao.completeAgentTransfer(hostId);
continue;
}
if (attache.getQueueSize() == 0 && attache.getNonRecurringListenersSize() == 0) {
iterator.remove();
try {
s_logger.debug("Transferring agent for host " + hostId + " from management server " + initialOwner + " " + "to " + futureOwner);
_executor.execute(new RebalanceTask(hostId, transferMap.getInitialOwner(), transferMap.getFutureOwner()));
} catch (final RejectedExecutionException ex) {
s_logger.warn("Failed to submit rebalance task for host id=" + hostId + "; postponing the execution");
continue;
}
} else {
s_logger.debug("Agent " + hostId + " can't be transferred yet as its request queue size is " + attache.getQueueSize() + " and listener queue size is " + attache.getNonRecurringListenersSize());
}
}
} else {
if (s_logger.isTraceEnabled()) {
s_logger.trace("Found no agents to be transferee by the management server " + _nodeId);
}
}
}
}
};
}
use of com.cloud.managed.context.ManagedContextRunnable in project cosmic by MissionCriticalCloud.
the class AsyncJobManagerImpl method getExecutorRunnable.
private Runnable getExecutorRunnable(final AsyncJob job) {
return new ManagedContextRunnable() {
@Override
public void run() {
// register place-holder context to avoid installing system account call context
if (CallContext.current() == null) {
CallContext.registerPlaceHolderContext();
}
final String related = job.getRelated();
String logContext = job.getShortUuid();
if (related != null && !related.isEmpty()) {
MDC.put("job", " (job: " + related + "/" + "job: " + job.getId() + ")");
final AsyncJob relatedJob = _jobDao.findByIdIncludingRemoved(Long.parseLong(related));
if (relatedJob != null) {
logContext = relatedJob.getShortUuid();
}
} else {
MDC.put("job", " (job: " + job.getId() + ")");
}
MDC.put("logcontextid", " (logid: " + logContext + ")");
try {
super.run();
} finally {
MDC.remove("job");
}
}
@Override
protected void runInContext() {
final long runNumber = getJobRunNumber();
try {
//
try {
JmxUtil.registerMBean("AsyncJobManager", "Active Job " + job.getId(), new AsyncJobMBeanImpl(job));
} catch (final Exception e) {
// is expected to fail under situations
if (s_logger.isTraceEnabled()) {
s_logger.trace("Unable to register active job " + job.getId() + " to JMX monitoring due to exception " + ExceptionUtil.toString(e));
}
}
_jobMonitor.registerActiveTask(runNumber, job.getId());
AsyncJobExecutionContext.setCurrentExecutionContext(new AsyncJobExecutionContext(job));
final String related = job.getRelated();
String logContext = job.getShortUuid();
if (related != null && !related.isEmpty()) {
final AsyncJob relatedJob = _jobDao.findByIdIncludingRemoved(Long.parseLong(related));
if (relatedJob != null) {
logContext = relatedJob.getShortUuid();
}
}
MDC.put("logcontextid", " (logid: " + logContext + ")");
// execute the job
if (s_logger.isDebugEnabled()) {
s_logger.debug("Executing " + StringUtils.cleanString(job.toString()));
}
if ((getAndResetPendingSignals(job) & AsyncJob.Constants.SIGNAL_MASK_WAKEUP) != 0) {
final AsyncJobDispatcher jobDispatcher = getWakeupDispatcher(job);
if (jobDispatcher != null) {
jobDispatcher.runJob(job);
} else {
// TODO, job wakeup is not in use yet
if (s_logger.isTraceEnabled()) {
s_logger.trace("Unable to find a wakeup dispatcher from the joined job: " + job);
}
}
} else {
final AsyncJobDispatcher jobDispatcher = getDispatcher(job.getDispatcher());
if (jobDispatcher != null) {
jobDispatcher.runJob(job);
} else {
s_logger.error("Unable to find job dispatcher, job will be cancelled");
final ExceptionResponse response = new ExceptionResponse();
response.setErrorCode(ApiErrorCode.INTERNAL_ERROR.getHttpCode());
response.setErrorText("Unable to find job dispatcher, job will be cancelled");
completeAsyncJob(job.getId(), JobInfo.Status.FAILED, ApiErrorCode.INTERNAL_ERROR.getHttpCode(), JobSerializerHelper.toSerializedString(response));
}
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Done executing " + job.getCmd() + " for job-" + job.getId());
}
} catch (final Throwable e) {
s_logger.error("Unexpected exception", e);
final ExceptionResponse response = new ExceptionResponse();
response.setErrorCode(ApiErrorCode.INTERNAL_ERROR.getHttpCode());
response.setErrorText(ExceptionUtils.getRootCauseMessage(e));
completeAsyncJob(job.getId(), JobInfo.Status.FAILED, ApiErrorCode.INTERNAL_ERROR.getHttpCode(), JobSerializerHelper.toSerializedString(response));
} finally {
// guard final clause as well
try {
if (job.getSyncSource() != null) {
// here check queue item one more time to double make sure that queue item is removed in case of any uncaught exception
_queueMgr.purgeItem(job.getSyncSource().getId());
}
try {
JmxUtil.unregisterMBean("AsyncJobManager", "Active Job " + job.getId());
} catch (final Exception e) {
// is expected to fail under situations
if (s_logger.isTraceEnabled()) {
s_logger.trace("Unable to unregister job " + job.getId() + " to JMX monitoring due to exception " + ExceptionUtil.toString(e));
}
}
//
// clean execution environment
//
AsyncJobExecutionContext.unregister();
_jobMonitor.unregisterActiveTask(runNumber);
} catch (final Throwable e) {
s_logger.error("Double exception", e);
}
}
}
};
}
Aggregations