Search in sources :

Example 1 with ManagedContextRunnable

use of com.cloud.managed.context.ManagedContextRunnable in project cosmic by MissionCriticalCloud.

the class ClusterManagerImpl method getNotificationTask.

private Runnable getNotificationTask() {
    return new ManagedContextRunnable() {

        @Override
        protected void runInContext() {
            while (true) {
                synchronized (_notificationMsgs) {
                    try {
                        _notificationMsgs.wait(1000);
                    } catch (final InterruptedException e) {
                    }
                }
                ClusterManagerMessage msg = null;
                while ((msg = getNextNotificationMessage()) != null) {
                    switch(msg.getMessageType()) {
                        case nodeAdded:
                            if (msg.getNodes() != null && msg.getNodes().size() > 0) {
                                final Profiler profiler = new Profiler();
                                profiler.start();
                                notifyNodeJoined(msg.getNodes());
                                profiler.stop();
                                if (profiler.getDurationInMillis() > 1000) {
                                    if (s_logger.isDebugEnabled()) {
                                        s_logger.debug("Notifying management server join event took " + profiler.getDurationInMillis() + " ms");
                                    }
                                } else {
                                    s_logger.warn("Notifying management server join event took " + profiler.getDurationInMillis() + " ms");
                                }
                            }
                            break;
                        case nodeRemoved:
                            if (msg.getNodes() != null && msg.getNodes().size() > 0) {
                                final Profiler profiler = new Profiler();
                                profiler.start();
                                notifyNodeLeft(msg.getNodes());
                                profiler.stop();
                                if (profiler.getDurationInMillis() > 1000) {
                                    if (s_logger.isDebugEnabled()) {
                                        s_logger.debug("Notifying management server leave event took " + profiler.getDurationInMillis() + " ms");
                                    }
                                } else {
                                    s_logger.warn("Notifying management server leave event took " + profiler.getDurationInMillis() + " ms");
                                }
                            }
                            break;
                        case nodeIsolated:
                            notifyNodeIsolated();
                            break;
                        default:
                            assert false;
                            break;
                    }
                }
                try {
                    Thread.sleep(1000);
                } catch (final InterruptedException e) {
                    s_logger.warn("Caught (previously ignored) interrupted exception", e);
                }
            }
        }
    };
}
Also used : ManagedContextRunnable(com.cloud.managed.context.ManagedContextRunnable) Profiler(com.cloud.utils.Profiler)

Example 2 with ManagedContextRunnable

use of com.cloud.managed.context.ManagedContextRunnable in project cosmic by MissionCriticalCloud.

the class ClusterManagerImpl method onNotifyingClusterPdu.

private void onNotifyingClusterPdu() {
    while (true) {
        final ClusterServicePdu pdu = popIncomingClusterPdu(1000);
        if (pdu == null) {
            continue;
        }
        _executor.execute(new ManagedContextRunnable() {

            @Override
            protected void runInContext() {
                if (pdu.getPduType() == ClusterServicePdu.PDU_TYPE_RESPONSE) {
                    final ClusterServiceRequestPdu requestPdu = popRequestPdu(pdu.getAckSequenceId());
                    if (requestPdu != null) {
                        requestPdu.setResponseResult(pdu.getJsonPackage());
                        synchronized (requestPdu) {
                            requestPdu.notifyAll();
                        }
                    } else {
                        s_logger.warn("Original request has already been cancelled. pdu: " + pdu.getJsonPackage());
                    }
                } else {
                    String result = _dispatcher.dispatch(pdu);
                    if (result == null) {
                        result = "";
                    }
                    if (pdu.getPduType() == ClusterServicePdu.PDU_TYPE_REQUEST) {
                        final ClusterServicePdu responsePdu = new ClusterServicePdu();
                        responsePdu.setPduType(ClusterServicePdu.PDU_TYPE_RESPONSE);
                        responsePdu.setSourcePeer(pdu.getDestPeer());
                        responsePdu.setDestPeer(pdu.getSourcePeer());
                        responsePdu.setAckSequenceId(pdu.getSequenceId());
                        responsePdu.setJsonPackage(result);
                        addOutgoingClusterPdu(responsePdu);
                    }
                }
            }
        });
    }
}
Also used : ManagedContextRunnable(com.cloud.managed.context.ManagedContextRunnable)

Example 3 with ManagedContextRunnable

use of com.cloud.managed.context.ManagedContextRunnable in project cosmic by MissionCriticalCloud.

the class AsyncJobManagerImpl method getHeartbeatTask.

private Runnable getHeartbeatTask() {
    return new ManagedContextRunnable() {

        @Override
        protected void runInContext() {
            final GlobalLock scanLock = GlobalLock.getInternLock("AsyncJobManagerHeartbeat");
            try {
                if (scanLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_COOPERATION)) {
                    try {
                        reallyRun();
                    } finally {
                        scanLock.unlock();
                    }
                }
            } finally {
                scanLock.releaseRef();
            }
        }

        protected void reallyRun() {
            try {
                final List<SyncQueueItemVO> l = _queueMgr.dequeueFromAny(getMsid(), MAX_ONETIME_SCHEDULE_SIZE);
                if (l != null && l.size() > 0) {
                    for (final SyncQueueItemVO item : l) {
                        if (s_logger.isDebugEnabled()) {
                            s_logger.debug("Execute sync-queue item: " + item.toString());
                        }
                        executeQueueItem(item, false);
                    }
                }
                final List<Long> standaloneWakeupJobs = wakeupScan();
                for (final Long jobId : standaloneWakeupJobs) {
                    // TODO, we assume that all jobs in this category is API job only
                    final AsyncJobVO job = _jobDao.findById(jobId);
                    if (job != null && (job.getPendingSignals() & AsyncJob.Constants.SIGNAL_MASK_WAKEUP) != 0) {
                        scheduleExecution(job, false);
                    }
                }
            } catch (final Throwable e) {
                s_logger.error("Unexpected exception when trying to execute queue item, ", e);
            }
        }
    };
}
Also used : GlobalLock(com.cloud.utils.db.GlobalLock) ManagedContextRunnable(com.cloud.managed.context.ManagedContextRunnable)

Example 4 with ManagedContextRunnable

use of com.cloud.managed.context.ManagedContextRunnable in project cosmic by MissionCriticalCloud.

the class ClusteredAgentManagerImpl method getTransferScanTask.

private Runnable getTransferScanTask() {
    return new ManagedContextRunnable() {

        @Override
        protected void runInContext() {
            if (s_logger.isTraceEnabled()) {
                s_logger.trace("Clustered agent transfer scan check, management server id:" + _nodeId);
            }
            synchronized (_agentToTransferIds) {
                if (_agentToTransferIds.size() > 0) {
                    s_logger.debug("Found " + _agentToTransferIds.size() + " agents to transfer");
                    // for (Long hostId : _agentToTransferIds) {
                    for (final Iterator<Long> iterator = _agentToTransferIds.iterator(); iterator.hasNext(); ) {
                        final Long hostId = iterator.next();
                        final AgentAttache attache = findAttache(hostId);
                        // if the thread:
                        // 1) timed out waiting for the host to reconnect
                        // 2) recipient management server is not active any more
                        // 3) if the management server doesn't own the host any more
                        // remove the host from re-balance list and delete from op_host_transfer DB
                        // no need to do anything with the real attache as we haven't modified it yet
                        final Date cutTime = DateUtil.currentGMTTime();
                        final HostTransferMapVO transferMap = _hostTransferDao.findActiveHostTransferMapByHostId(hostId, new Date(cutTime.getTime() - rebalanceTimeOut));
                        if (transferMap == null) {
                            s_logger.debug("Timed out waiting for the host id=" + hostId + " to be ready to transfer, skipping rebalance for the host");
                            iterator.remove();
                            _hostTransferDao.completeAgentTransfer(hostId);
                            continue;
                        }
                        if (transferMap.getInitialOwner() != _nodeId || attache == null || attache.forForward()) {
                            s_logger.debug("Management server " + _nodeId + " doesn't own host id=" + hostId + " any more, skipping rebalance for the host");
                            iterator.remove();
                            _hostTransferDao.completeAgentTransfer(hostId);
                            continue;
                        }
                        final ManagementServerHostVO futureOwner = _mshostDao.findByMsid(transferMap.getFutureOwner());
                        final ManagementServerHostVO initialOwner = _mshostDao.findByMsid(transferMap.getInitialOwner());
                        if (futureOwner != null && futureOwner.getState() != ManagementServerHost.State.Up) {
                            s_logger.debug("Can't transfer host " + hostId + " as it's future owner is not in UP state: " + futureOwner + ", skipping rebalance");
                            iterator.remove();
                            _hostTransferDao.completeAgentTransfer(hostId);
                            continue;
                        }
                        if (attache.getQueueSize() == 0 && attache.getNonRecurringListenersSize() == 0) {
                            iterator.remove();
                            try {
                                s_logger.debug("Transferring agent for host " + hostId + " from management server " + initialOwner + " " + "to " + futureOwner);
                                _executor.execute(new RebalanceTask(hostId, transferMap.getInitialOwner(), transferMap.getFutureOwner()));
                            } catch (final RejectedExecutionException ex) {
                                s_logger.warn("Failed to submit rebalance task for host id=" + hostId + "; postponing the execution");
                                continue;
                            }
                        } else {
                            s_logger.debug("Agent " + hostId + " can't be transferred yet as its request queue size is " + attache.getQueueSize() + " and listener queue size is " + attache.getNonRecurringListenersSize());
                        }
                    }
                } else {
                    if (s_logger.isTraceEnabled()) {
                        s_logger.trace("Found no agents to be transferee by the management server " + _nodeId);
                    }
                }
            }
        }
    };
}
Also used : ManagedContextRunnable(com.cloud.managed.context.ManagedContextRunnable) ManagementServerHostVO(com.cloud.cluster.ManagementServerHostVO) HostTransferMapVO(com.cloud.cluster.agentlb.HostTransferMapVO) Date(java.util.Date) RejectedExecutionException(java.util.concurrent.RejectedExecutionException)

Example 5 with ManagedContextRunnable

use of com.cloud.managed.context.ManagedContextRunnable in project cosmic by MissionCriticalCloud.

the class AsyncJobManagerImpl method getExecutorRunnable.

private Runnable getExecutorRunnable(final AsyncJob job) {
    return new ManagedContextRunnable() {

        @Override
        public void run() {
            // register place-holder context to avoid installing system account call context
            if (CallContext.current() == null) {
                CallContext.registerPlaceHolderContext();
            }
            final String related = job.getRelated();
            String logContext = job.getShortUuid();
            if (related != null && !related.isEmpty()) {
                MDC.put("job", " (job: " + related + "/" + "job: " + job.getId() + ")");
                final AsyncJob relatedJob = _jobDao.findByIdIncludingRemoved(Long.parseLong(related));
                if (relatedJob != null) {
                    logContext = relatedJob.getShortUuid();
                }
            } else {
                MDC.put("job", " (job: " + job.getId() + ")");
            }
            MDC.put("logcontextid", " (logid: " + logContext + ")");
            try {
                super.run();
            } finally {
                MDC.remove("job");
            }
        }

        @Override
        protected void runInContext() {
            final long runNumber = getJobRunNumber();
            try {
                // 
                try {
                    JmxUtil.registerMBean("AsyncJobManager", "Active Job " + job.getId(), new AsyncJobMBeanImpl(job));
                } catch (final Exception e) {
                    // is expected to fail under situations
                    if (s_logger.isTraceEnabled()) {
                        s_logger.trace("Unable to register active job " + job.getId() + " to JMX monitoring due to exception " + ExceptionUtil.toString(e));
                    }
                }
                _jobMonitor.registerActiveTask(runNumber, job.getId());
                AsyncJobExecutionContext.setCurrentExecutionContext(new AsyncJobExecutionContext(job));
                final String related = job.getRelated();
                String logContext = job.getShortUuid();
                if (related != null && !related.isEmpty()) {
                    final AsyncJob relatedJob = _jobDao.findByIdIncludingRemoved(Long.parseLong(related));
                    if (relatedJob != null) {
                        logContext = relatedJob.getShortUuid();
                    }
                }
                MDC.put("logcontextid", " (logid: " + logContext + ")");
                // execute the job
                if (s_logger.isDebugEnabled()) {
                    s_logger.debug("Executing " + StringUtils.cleanString(job.toString()));
                }
                if ((getAndResetPendingSignals(job) & AsyncJob.Constants.SIGNAL_MASK_WAKEUP) != 0) {
                    final AsyncJobDispatcher jobDispatcher = getWakeupDispatcher(job);
                    if (jobDispatcher != null) {
                        jobDispatcher.runJob(job);
                    } else {
                        // TODO, job wakeup is not in use yet
                        if (s_logger.isTraceEnabled()) {
                            s_logger.trace("Unable to find a wakeup dispatcher from the joined job: " + job);
                        }
                    }
                } else {
                    final AsyncJobDispatcher jobDispatcher = getDispatcher(job.getDispatcher());
                    if (jobDispatcher != null) {
                        jobDispatcher.runJob(job);
                    } else {
                        s_logger.error("Unable to find job dispatcher, job will be cancelled");
                        final ExceptionResponse response = new ExceptionResponse();
                        response.setErrorCode(ApiErrorCode.INTERNAL_ERROR.getHttpCode());
                        response.setErrorText("Unable to find job dispatcher, job will be cancelled");
                        completeAsyncJob(job.getId(), JobInfo.Status.FAILED, ApiErrorCode.INTERNAL_ERROR.getHttpCode(), JobSerializerHelper.toSerializedString(response));
                    }
                }
                if (s_logger.isDebugEnabled()) {
                    s_logger.debug("Done executing " + job.getCmd() + " for job-" + job.getId());
                }
            } catch (final Throwable e) {
                s_logger.error("Unexpected exception", e);
                final ExceptionResponse response = new ExceptionResponse();
                response.setErrorCode(ApiErrorCode.INTERNAL_ERROR.getHttpCode());
                response.setErrorText(ExceptionUtils.getRootCauseMessage(e));
                completeAsyncJob(job.getId(), JobInfo.Status.FAILED, ApiErrorCode.INTERNAL_ERROR.getHttpCode(), JobSerializerHelper.toSerializedString(response));
            } finally {
                // guard final clause as well
                try {
                    if (job.getSyncSource() != null) {
                        // here check queue item one more time to double make sure that queue item is removed in case of any uncaught exception
                        _queueMgr.purgeItem(job.getSyncSource().getId());
                    }
                    try {
                        JmxUtil.unregisterMBean("AsyncJobManager", "Active Job " + job.getId());
                    } catch (final Exception e) {
                        // is expected to fail under situations
                        if (s_logger.isTraceEnabled()) {
                            s_logger.trace("Unable to unregister job " + job.getId() + " to JMX monitoring due to exception " + ExceptionUtil.toString(e));
                        }
                    }
                    // 
                    // clean execution environment
                    // 
                    AsyncJobExecutionContext.unregister();
                    _jobMonitor.unregisterActiveTask(runNumber);
                } catch (final Throwable e) {
                    s_logger.error("Double exception", e);
                }
            }
        }
    };
}
Also used : ManagedContextRunnable(com.cloud.managed.context.ManagedContextRunnable) ExceptionResponse(com.cloud.api.response.ExceptionResponse) AsyncJobExecutionContext(com.cloud.framework.jobs.AsyncJobExecutionContext) AsyncJob(com.cloud.framework.jobs.AsyncJob) ConfigurationException(javax.naming.ConfigurationException) CloudRuntimeException(com.cloud.utils.exception.CloudRuntimeException) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) AsyncJobDispatcher(com.cloud.framework.jobs.AsyncJobDispatcher)

Aggregations

ManagedContextRunnable (com.cloud.managed.context.ManagedContextRunnable)10 Profiler (com.cloud.utils.Profiler)2 GlobalLock (com.cloud.utils.db.GlobalLock)2 CloudRuntimeException (com.cloud.utils.exception.CloudRuntimeException)2 Date (java.util.Date)2 RejectedExecutionException (java.util.concurrent.RejectedExecutionException)2 ExceptionResponse (com.cloud.api.response.ExceptionResponse)1 ManagementServerHostVO (com.cloud.cluster.ManagementServerHostVO)1 HostTransferMapVO (com.cloud.cluster.agentlb.HostTransferMapVO)1 CallContext (com.cloud.context.CallContext)1 AsyncJob (com.cloud.framework.jobs.AsyncJob)1 AsyncJobDispatcher (com.cloud.framework.jobs.AsyncJobDispatcher)1 AsyncJobExecutionContext (com.cloud.framework.jobs.AsyncJobExecutionContext)1 StoragePool (com.cloud.storage.StoragePool)1 DB (com.cloud.utils.db.DB)1 TransactionLegacy (com.cloud.utils.db.TransactionLegacy)1 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 Method (java.lang.reflect.Method)1 SQLException (java.sql.SQLException)1 ConfigurationException (javax.naming.ConfigurationException)1