use of org.apache.helix.task.TaskDriver in project helix by apache.
the class GenericHelixController method handleEvent.
/**
* lock-always: caller always needs to obtain an external lock before call, calls to handleEvent()
* should be serialized
* @param event
*/
protected void handleEvent(ClusterEvent event, ClusterDataCache cache) {
HelixManager manager = event.getAttribute(AttributeName.helixmanager.name());
if (manager == null) {
logger.error("No cluster manager in event:" + event.getEventType());
return;
}
if (!manager.isLeader()) {
logger.error("Cluster manager: " + manager.getInstanceName() + " is not leader for " + manager.getClusterName() + ". Pipeline will not be invoked");
return;
}
// will be excuting in un-paused mode. Which might not be the config in ZK.
if (_paused) {
logger.info("Cluster " + manager.getClusterName() + " is paused. Ignoring the event:" + event.getEventType());
return;
}
NotificationContext context = null;
if (event.getAttribute(AttributeName.changeContext.name()) != null) {
context = event.getAttribute(AttributeName.changeContext.name());
}
if (context != null) {
if (context.getType() == Type.FINALIZE) {
stopRebalancingTimers();
logger.info("Get FINALIZE notification, skip the pipeline. Event :" + event.getEventType());
return;
} else {
// TODO: should be in the initialization of controller.
if (_cache != null) {
checkRebalancingTimer(manager, Collections.EMPTY_LIST, _cache.getClusterConfig());
}
if (_isMonitoring) {
event.addAttribute(AttributeName.clusterStatusMonitor.name(), _clusterStatusMonitor);
}
}
}
// add the cache
event.addAttribute(AttributeName.ClusterDataCache.name(), cache);
List<Pipeline> pipelines = cache.isTaskCache() ? _taskRegistry.getPipelinesForEvent(event.getEventType()) : _registry.getPipelinesForEvent(event.getEventType());
if (pipelines == null || pipelines.size() == 0) {
logger.info("No " + getPipelineType(cache.isTaskCache()) + " pipeline to run for event:" + event.getEventType());
return;
}
logger.info(String.format("START: Invoking %s controller pipeline for cluster %s event: %s", manager.getClusterName(), getPipelineType(cache.isTaskCache()), event.getEventType()));
long startTime = System.currentTimeMillis();
boolean rebalanceFail = false;
for (Pipeline pipeline : pipelines) {
try {
pipeline.handle(event);
pipeline.finish();
} catch (Exception e) {
logger.error("Exception while executing " + getPipelineType(cache.isTaskCache()) + "pipeline: " + pipeline + "for cluster ." + _clusterName + ". Will not continue to next pipeline", e);
if (e instanceof HelixMetaDataAccessException) {
rebalanceFail = true;
// If pipeline failed due to read/write fails to zookeeper, retry the pipeline.
cache.requireFullRefresh();
logger.warn("Rebalance pipeline failed due to read failure from zookeeper, cluster: " + _clusterName);
// only push a retry event when there is no pending event in the corresponding event queue.
if (isEventQueueEmpty(cache.isTaskCache())) {
_continousRebalanceFailureCount++;
long delay = getRetryDelay(_continousRebalanceFailureCount);
if (delay == 0) {
forceRebalance(manager, ClusterEventType.RetryRebalance);
} else {
_asyncTasksThreadPool.schedule(new RebalanceTask(manager, ClusterEventType.RetryRebalance), delay, TimeUnit.MILLISECONDS);
}
logger.info("Retry rebalance pipeline with delay " + delay + "ms for cluster: " + _clusterName);
}
}
_clusterStatusMonitor.reportRebalanceFailure();
break;
}
}
if (!rebalanceFail) {
_continousRebalanceFailureCount = 0;
}
long endTime = System.currentTimeMillis();
logger.info(String.format("END: Invoking %s controller pipeline for event: %s for cluster %s, took %d ms", getPipelineType(cache.isTaskCache()), event.getEventType(), manager.getClusterName(), (endTime - startTime)));
if (!cache.isTaskCache()) {
// report event process durations
NotificationContext notificationContext = event.getAttribute(AttributeName.changeContext.name());
long enqueueTime = event.getCreationTime();
long zkCallbackTime;
StringBuilder sb = new StringBuilder();
if (notificationContext != null) {
zkCallbackTime = notificationContext.getCreationTime();
if (_isMonitoring) {
_clusterStatusMonitor.updateClusterEventDuration(ClusterEventMonitor.PhaseName.Callback.name(), enqueueTime - zkCallbackTime);
}
sb.append(String.format("Callback time for event: " + event.getEventType() + " took: " + (enqueueTime - zkCallbackTime) + " ms\n"));
}
if (_isMonitoring) {
_clusterStatusMonitor.updateClusterEventDuration(ClusterEventMonitor.PhaseName.InQueue.name(), startTime - enqueueTime);
_clusterStatusMonitor.updateClusterEventDuration(ClusterEventMonitor.PhaseName.TotalProcessed.name(), endTime - startTime);
}
sb.append(String.format("InQueue time for event: " + event.getEventType() + " took: " + (startTime - enqueueTime) + " ms\n"));
sb.append(String.format("TotalProcessed time for event: " + event.getEventType() + " took: " + (endTime - startTime) + " ms"));
logger.info(sb.toString());
} else if (_isMonitoring) {
// report workflow status
TaskDriver driver = new TaskDriver(manager);
_clusterStatusMonitor.refreshWorkflowsStatus(driver);
_clusterStatusMonitor.refreshJobsStatus(driver);
}
// If event handling happens before controller deactivate, the process may write unnecessary
// MBeans to monitoring after the monitor is disabled.
// So reset ClusterStatusMonitor according to it's status after all event handling.
// TODO remove this once clusterStatusMonitor blocks any MBean register on isMonitoring = false.
resetClusterStatusMonitor();
}
Aggregations