Search in sources :

Example 31 with TaskDriver

use of org.apache.helix.task.TaskDriver in project helix by apache.

the class GenericHelixController method handleEvent.

/**
 * lock-always: caller always needs to obtain an external lock before call, calls to handleEvent()
 * should be serialized
 * @param event
 */
protected void handleEvent(ClusterEvent event, ClusterDataCache cache) {
    HelixManager manager = event.getAttribute(AttributeName.helixmanager.name());
    if (manager == null) {
        logger.error("No cluster manager in event:" + event.getEventType());
        return;
    }
    if (!manager.isLeader()) {
        logger.error("Cluster manager: " + manager.getInstanceName() + " is not leader for " + manager.getClusterName() + ". Pipeline will not be invoked");
        return;
    }
    // will be excuting in un-paused mode. Which might not be the config in ZK.
    if (_paused) {
        logger.info("Cluster " + manager.getClusterName() + " is paused. Ignoring the event:" + event.getEventType());
        return;
    }
    NotificationContext context = null;
    if (event.getAttribute(AttributeName.changeContext.name()) != null) {
        context = event.getAttribute(AttributeName.changeContext.name());
    }
    if (context != null) {
        if (context.getType() == Type.FINALIZE) {
            stopRebalancingTimers();
            logger.info("Get FINALIZE notification, skip the pipeline. Event :" + event.getEventType());
            return;
        } else {
            // TODO: should be in the initialization of controller.
            if (_cache != null) {
                checkRebalancingTimer(manager, Collections.EMPTY_LIST, _cache.getClusterConfig());
            }
            if (_isMonitoring) {
                event.addAttribute(AttributeName.clusterStatusMonitor.name(), _clusterStatusMonitor);
            }
        }
    }
    // add the cache
    event.addAttribute(AttributeName.ClusterDataCache.name(), cache);
    List<Pipeline> pipelines = cache.isTaskCache() ? _taskRegistry.getPipelinesForEvent(event.getEventType()) : _registry.getPipelinesForEvent(event.getEventType());
    if (pipelines == null || pipelines.size() == 0) {
        logger.info("No " + getPipelineType(cache.isTaskCache()) + " pipeline to run for event:" + event.getEventType());
        return;
    }
    logger.info(String.format("START: Invoking %s controller pipeline for cluster %s event: %s", manager.getClusterName(), getPipelineType(cache.isTaskCache()), event.getEventType()));
    long startTime = System.currentTimeMillis();
    boolean rebalanceFail = false;
    for (Pipeline pipeline : pipelines) {
        try {
            pipeline.handle(event);
            pipeline.finish();
        } catch (Exception e) {
            logger.error("Exception while executing " + getPipelineType(cache.isTaskCache()) + "pipeline: " + pipeline + "for cluster ." + _clusterName + ". Will not continue to next pipeline", e);
            if (e instanceof HelixMetaDataAccessException) {
                rebalanceFail = true;
                // If pipeline failed due to read/write fails to zookeeper, retry the pipeline.
                cache.requireFullRefresh();
                logger.warn("Rebalance pipeline failed due to read failure from zookeeper, cluster: " + _clusterName);
                // only push a retry event when there is no pending event in the corresponding event queue.
                if (isEventQueueEmpty(cache.isTaskCache())) {
                    _continousRebalanceFailureCount++;
                    long delay = getRetryDelay(_continousRebalanceFailureCount);
                    if (delay == 0) {
                        forceRebalance(manager, ClusterEventType.RetryRebalance);
                    } else {
                        _asyncTasksThreadPool.schedule(new RebalanceTask(manager, ClusterEventType.RetryRebalance), delay, TimeUnit.MILLISECONDS);
                    }
                    logger.info("Retry rebalance pipeline with delay " + delay + "ms for cluster: " + _clusterName);
                }
            }
            _clusterStatusMonitor.reportRebalanceFailure();
            break;
        }
    }
    if (!rebalanceFail) {
        _continousRebalanceFailureCount = 0;
    }
    long endTime = System.currentTimeMillis();
    logger.info(String.format("END: Invoking %s controller pipeline for event: %s for cluster %s, took %d ms", getPipelineType(cache.isTaskCache()), event.getEventType(), manager.getClusterName(), (endTime - startTime)));
    if (!cache.isTaskCache()) {
        // report event process durations
        NotificationContext notificationContext = event.getAttribute(AttributeName.changeContext.name());
        long enqueueTime = event.getCreationTime();
        long zkCallbackTime;
        StringBuilder sb = new StringBuilder();
        if (notificationContext != null) {
            zkCallbackTime = notificationContext.getCreationTime();
            if (_isMonitoring) {
                _clusterStatusMonitor.updateClusterEventDuration(ClusterEventMonitor.PhaseName.Callback.name(), enqueueTime - zkCallbackTime);
            }
            sb.append(String.format("Callback time for event: " + event.getEventType() + " took: " + (enqueueTime - zkCallbackTime) + " ms\n"));
        }
        if (_isMonitoring) {
            _clusterStatusMonitor.updateClusterEventDuration(ClusterEventMonitor.PhaseName.InQueue.name(), startTime - enqueueTime);
            _clusterStatusMonitor.updateClusterEventDuration(ClusterEventMonitor.PhaseName.TotalProcessed.name(), endTime - startTime);
        }
        sb.append(String.format("InQueue time for event: " + event.getEventType() + " took: " + (startTime - enqueueTime) + " ms\n"));
        sb.append(String.format("TotalProcessed time for event: " + event.getEventType() + " took: " + (endTime - startTime) + " ms"));
        logger.info(sb.toString());
    } else if (_isMonitoring) {
        // report workflow status
        TaskDriver driver = new TaskDriver(manager);
        _clusterStatusMonitor.refreshWorkflowsStatus(driver);
        _clusterStatusMonitor.refreshJobsStatus(driver);
    }
    // If event handling happens before controller deactivate, the process may write unnecessary
    // MBeans to monitoring after the monitor is disabled.
    // So reset ClusterStatusMonitor according to it's status after all event handling.
    // TODO remove this once clusterStatusMonitor blocks any MBean register on isMonitoring = false.
    resetClusterStatusMonitor();
}
Also used : NotificationContext(org.apache.helix.NotificationContext) HelixManager(org.apache.helix.HelixManager) HelixMetaDataAccessException(org.apache.helix.api.exceptions.HelixMetaDataAccessException) TaskDriver(org.apache.helix.task.TaskDriver) ZkInterruptedException(org.I0Itec.zkclient.exception.ZkInterruptedException) HelixMetaDataAccessException(org.apache.helix.api.exceptions.HelixMetaDataAccessException) Pipeline(org.apache.helix.controller.pipeline.Pipeline)

Aggregations

TaskDriver (org.apache.helix.task.TaskDriver)31 WorkflowConfig (org.apache.helix.task.WorkflowConfig)11 Path (javax.ws.rs.Path)9 GET (javax.ws.rs.GET)8 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 HelixException (org.apache.helix.HelixException)6 ZkClient (org.apache.helix.manager.zk.ZkClient)6 JobConfig (org.apache.helix.task.JobConfig)6 Workflow (org.apache.helix.task.Workflow)6 Test (org.testng.annotations.Test)6 ZNRecord (org.apache.helix.ZNRecord)5 JobQueue (org.apache.helix.task.JobQueue)4 WorkflowContext (org.apache.helix.task.WorkflowContext)4 ObjectNode (org.codehaus.jackson.node.ObjectNode)4 ArrayList (java.util.ArrayList)3 Map (java.util.Map)3 Entity (javax.ws.rs.client.Entity)3 HelixManager (org.apache.helix.HelixManager)3 ClusterControllerManager (org.apache.helix.integration.manager.ClusterControllerManager)3