Search in sources :

Example 1 with ClusterStatusMonitor

use of org.apache.helix.monitoring.mbeans.ClusterStatusMonitor in project helix by apache.

the class ReadClusterDataStage method process.

@Override
public void process(ClusterEvent event) throws Exception {
    HelixManager manager = event.getAttribute(AttributeName.helixmanager.name());
    if (manager == null) {
        throw new StageException("HelixManager attribute value is null");
    }
    ClusterDataCache cache = event.getAttribute(AttributeName.ClusterDataCache.name());
    if (cache == null && _cache == null) {
        cache = new ClusterDataCache(event.getClusterName());
    }
    _cache = cache;
    HelixDataAccessor dataAccessor = manager.getHelixDataAccessor();
    _cache.refresh(dataAccessor);
    final ClusterConfig clusterConfig = cache.getClusterConfig();
    if (!_cache.isTaskCache()) {
        final ClusterStatusMonitor clusterStatusMonitor = event.getAttribute(AttributeName.clusterStatusMonitor.name());
        asyncExecute(_cache.getAsyncTasksThreadPool(), new Callable<Object>() {

            @Override
            public Object call() {
                // Update the cluster status gauges
                if (clusterStatusMonitor != null) {
                    logger.debug("Update cluster status monitors");
                    Set<String> instanceSet = Sets.newHashSet();
                    Set<String> liveInstanceSet = Sets.newHashSet();
                    Set<String> disabledInstanceSet = Sets.newHashSet();
                    Map<String, Map<String, List<String>>> disabledPartitions = Maps.newHashMap();
                    Map<String, List<String>> oldDisabledPartitions = Maps.newHashMap();
                    Map<String, Set<String>> tags = Maps.newHashMap();
                    Map<String, LiveInstance> liveInstanceMap = _cache.getLiveInstances();
                    for (Map.Entry<String, InstanceConfig> e : _cache.getInstanceConfigMap().entrySet()) {
                        String instanceName = e.getKey();
                        InstanceConfig config = e.getValue();
                        instanceSet.add(instanceName);
                        if (liveInstanceMap.containsKey(instanceName)) {
                            liveInstanceSet.add(instanceName);
                        }
                        if (!config.getInstanceEnabled() || (clusterConfig.getDisabledInstances() != null && clusterConfig.getDisabledInstances().containsKey(instanceName))) {
                            disabledInstanceSet.add(instanceName);
                        }
                        // TODO : Get rid of this data structure once the API is removed.
                        oldDisabledPartitions.put(instanceName, config.getDisabledPartitions());
                        disabledPartitions.put(instanceName, config.getDisabledPartitionsMap());
                        Set<String> instanceTags = Sets.newHashSet(config.getTags());
                        tags.put(instanceName, instanceTags);
                    }
                    clusterStatusMonitor.setClusterInstanceStatus(liveInstanceSet, instanceSet, disabledInstanceSet, disabledPartitions, oldDisabledPartitions, tags);
                    logger.debug("Complete cluster status monitors update.");
                }
                return null;
            }
        });
    }
    event.addAttribute(AttributeName.ClusterDataCache.name(), _cache);
}
Also used : HelixManager(org.apache.helix.HelixManager) Set(java.util.Set) StageException(org.apache.helix.controller.pipeline.StageException) ClusterStatusMonitor(org.apache.helix.monitoring.mbeans.ClusterStatusMonitor) HelixDataAccessor(org.apache.helix.HelixDataAccessor) InstanceConfig(org.apache.helix.model.InstanceConfig) List(java.util.List) Map(java.util.Map) ClusterConfig(org.apache.helix.model.ClusterConfig)

Example 2 with ClusterStatusMonitor

use of org.apache.helix.monitoring.mbeans.ClusterStatusMonitor in project helix by apache.

the class ExternalViewComputeStage method process.

@Override
public void process(ClusterEvent event) throws Exception {
    HelixManager manager = event.getAttribute(AttributeName.helixmanager.name());
    Map<String, Resource> resourceMap = event.getAttribute(AttributeName.RESOURCES.name());
    ClusterDataCache cache = event.getAttribute(AttributeName.ClusterDataCache.name());
    if (manager == null || resourceMap == null || cache == null) {
        throw new StageException("Missing attributes in event:" + event + ". Requires ClusterManager|RESOURCES|DataCache");
    }
    HelixDataAccessor dataAccessor = manager.getHelixDataAccessor();
    PropertyKey.Builder keyBuilder = dataAccessor.keyBuilder();
    CurrentStateOutput currentStateOutput = event.getAttribute(AttributeName.CURRENT_STATE.name());
    List<ExternalView> newExtViews = new ArrayList<>();
    Map<String, ExternalView> curExtViews = cache.getExternalViews();
    for (String resourceName : resourceMap.keySet()) {
        ExternalView view = new ExternalView(resourceName);
        // view.setBucketSize(currentStateOutput.getBucketSize(resourceName));
        // if resource ideal state has bucket size, set it
        // otherwise resource has been dropped, use bucket size from current state instead
        Resource resource = resourceMap.get(resourceName);
        if (resource.getBucketSize() > 0) {
            view.setBucketSize(resource.getBucketSize());
        } else {
            view.setBucketSize(currentStateOutput.getBucketSize(resourceName));
        }
        for (Partition partition : resource.getPartitions()) {
            Map<String, String> currentStateMap = currentStateOutput.getCurrentStateMap(resourceName, partition);
            if (currentStateMap != null && currentStateMap.size() > 0) {
                // = cache.getDisabledInstancesForResource(resource.toString());
                for (String instance : currentStateMap.keySet()) {
                    // if (!disabledInstances.contains(instance))
                    // {
                    view.setState(partition.getPartitionName(), instance, currentStateMap.get(instance));
                // }
                }
            }
        }
        // Update cluster status monitor mbean
        IdealState idealState = cache.getIdealState(resourceName);
        if (!cache.isTaskCache()) {
            ResourceConfig resourceConfig = cache.getResourceConfig(resourceName);
            ClusterStatusMonitor clusterStatusMonitor = event.getAttribute(AttributeName.clusterStatusMonitor.name());
            if (clusterStatusMonitor != null) {
                if (idealState != null && (resourceConfig == null || !resourceConfig.isMonitoringDisabled())) {
                    if (!idealState.getStateModelDefRef().equalsIgnoreCase(DefaultSchedulerMessageHandlerFactory.SCHEDULER_TASK_QUEUE)) {
                        StateModelDefinition stateModelDef = cache.getStateModelDef(idealState.getStateModelDefRef());
                        clusterStatusMonitor.setResourceStatus(view, cache.getIdealState(view.getResourceName()), stateModelDef);
                    }
                } else {
                    // Drop the metrics if the resource is dropped, or the MonitorDisabled is changed to true.
                    clusterStatusMonitor.unregisterResource(view.getResourceName());
                }
            }
        }
        ExternalView curExtView = curExtViews.get(resourceName);
        // copy simplefields from IS, in cases where IS is deleted copy it from existing ExternalView
        if (idealState != null) {
            view.getRecord().getSimpleFields().putAll(idealState.getRecord().getSimpleFields());
        } else if (curExtView != null) {
            view.getRecord().getSimpleFields().putAll(curExtView.getRecord().getSimpleFields());
        }
        // compare the new external view with current one, set only on different
        if (curExtView == null || !curExtView.getRecord().equals(view.getRecord())) {
            // Add external view to the list which will be written to ZK later.
            newExtViews.add(view);
            // scheduler message, and then remove the partitions from the ideal state
            if (idealState != null && idealState.getStateModelDefRef().equalsIgnoreCase(DefaultSchedulerMessageHandlerFactory.SCHEDULER_TASK_QUEUE)) {
                updateScheduledTaskStatus(view, manager, idealState);
            }
        }
    }
    List<String> externalviewsToRemove = new ArrayList<>();
    // TODO: consider not setting the externalview of SCHEDULER_TASK_QUEUE at all.
    // Are there any entity that will be interested in its change?
    // For the resource with DisableExternalView option turned on in IdealState
    // We will not actually create or write the externalView to ZooKeeper.
    List<PropertyKey> keys = new ArrayList<>();
    for (Iterator<ExternalView> it = newExtViews.iterator(); it.hasNext(); ) {
        ExternalView view = it.next();
        String resourceName = view.getResourceName();
        IdealState idealState = cache.getIdealState(resourceName);
        if (idealState != null && idealState.isExternalViewDisabled()) {
            it.remove();
            // remove the external view if the external view exists
            if (curExtViews.containsKey(resourceName)) {
                LOG.info("Remove externalView for resource: " + resourceName);
                dataAccessor.removeProperty(keyBuilder.externalView(resourceName));
                externalviewsToRemove.add(resourceName);
            }
        } else {
            keys.add(keyBuilder.externalView(resourceName));
        }
    }
    // add/update external-views
    if (newExtViews.size() > 0) {
        dataAccessor.setChildren(keys, newExtViews);
        cache.updateExternalViews(newExtViews);
    }
    // remove dead external-views
    for (String resourceName : curExtViews.keySet()) {
        if (!resourceMap.keySet().contains(resourceName)) {
            LOG.info("Remove externalView for resource: " + resourceName);
            dataAccessor.removeProperty(keyBuilder.externalView(resourceName));
            externalviewsToRemove.add(resourceName);
        }
    }
    cache.removeExternalViews(externalviewsToRemove);
}
Also used : StageException(org.apache.helix.controller.pipeline.StageException) Builder(org.apache.helix.PropertyKey.Builder) ClusterStatusMonitor(org.apache.helix.monitoring.mbeans.ClusterStatusMonitor)

Example 3 with ClusterStatusMonitor

use of org.apache.helix.monitoring.mbeans.ClusterStatusMonitor in project helix by apache.

the class Pipeline method handle.

public void handle(ClusterEvent event) throws Exception {
    if (_stages == null) {
        return;
    }
    for (Stage stage : _stages) {
        long startTime = System.currentTimeMillis();
        stage.preProcess();
        stage.process(event);
        stage.postProcess();
        long endTime = System.currentTimeMillis();
        long duration = endTime - startTime;
        logger.info(String.format("END %s for %s pipeline for cluster %s. took: %d ms ", stage.getStageName(), _pipelineType, event.getClusterName(), duration));
        ClusterStatusMonitor clusterStatusMonitor = event.getAttribute(AttributeName.clusterStatusMonitor.name());
        if (clusterStatusMonitor != null) {
            clusterStatusMonitor.updateClusterEventDuration(stage.getStageName(), duration);
        }
    }
}
Also used : ClusterStatusMonitor(org.apache.helix.monitoring.mbeans.ClusterStatusMonitor)

Example 4 with ClusterStatusMonitor

use of org.apache.helix.monitoring.mbeans.ClusterStatusMonitor in project helix by apache.

the class BestPossibleStateCalcStage method process.

@Override
public void process(ClusterEvent event) throws Exception {
    CurrentStateOutput currentStateOutput = event.getAttribute(AttributeName.CURRENT_STATE.name());
    final Map<String, Resource> resourceMap = event.getAttribute(AttributeName.RESOURCES_TO_REBALANCE.name());
    final ClusterStatusMonitor clusterStatusMonitor = event.getAttribute(AttributeName.clusterStatusMonitor.name());
    ClusterDataCache cache = event.getAttribute(AttributeName.ClusterDataCache.name());
    if (currentStateOutput == null || resourceMap == null || cache == null) {
        throw new StageException("Missing attributes in event:" + event + ". Requires CURRENT_STATE|RESOURCES|DataCache");
    }
    // Reset current INIT/RUNNING tasks on participants for throttling
    cache.resetActiveTaskCount(currentStateOutput);
    // Check whether the offline/disabled instance count in the cluster reaches the set limit,
    // if yes, pause the rebalancer.
    validateOfflineInstancesLimit(cache, (HelixManager) event.getAttribute(AttributeName.helixmanager.name()), clusterStatusMonitor);
    final BestPossibleStateOutput bestPossibleStateOutput = compute(event, resourceMap, currentStateOutput);
    event.addAttribute(AttributeName.BEST_POSSIBLE_STATE.name(), bestPossibleStateOutput);
    if (!cache.isTaskCache()) {
        final Map<String, InstanceConfig> instanceConfigMap = cache.getInstanceConfigMap();
        final Map<String, StateModelDefinition> stateModelDefMap = cache.getStateModelDefMap();
        asyncExecute(cache.getAsyncTasksThreadPool(), new Callable<Object>() {

            @Override
            public Object call() {
                try {
                    if (clusterStatusMonitor != null) {
                        clusterStatusMonitor.setPerInstanceResourceStatus(bestPossibleStateOutput, instanceConfigMap, resourceMap, stateModelDefMap);
                    }
                } catch (Exception e) {
                    logger.error("Could not update cluster status metrics!", e);
                }
                return null;
            }
        });
    }
}
Also used : StageException(org.apache.helix.controller.pipeline.StageException) ClusterStatusMonitor(org.apache.helix.monitoring.mbeans.ClusterStatusMonitor) StageException(org.apache.helix.controller.pipeline.StageException)

Example 5 with ClusterStatusMonitor

use of org.apache.helix.monitoring.mbeans.ClusterStatusMonitor in project helix by apache.

the class BestPossibleStateCalcStage method compute.

private BestPossibleStateOutput compute(ClusterEvent event, Map<String, Resource> resourceMap, CurrentStateOutput currentStateOutput) {
    ClusterDataCache cache = event.getAttribute(AttributeName.ClusterDataCache.name());
    BestPossibleStateOutput output = new BestPossibleStateOutput();
    PriorityQueue<ResourcePriority> resourcePriorityQueue = new PriorityQueue<>();
    TaskDriver taskDriver = null;
    HelixManager helixManager = event.getAttribute(AttributeName.helixmanager.name());
    if (helixManager != null) {
        taskDriver = new TaskDriver(helixManager);
    }
    for (Resource resource : resourceMap.values()) {
        resourcePriorityQueue.add(new ResourcePriority(resource, cache.getIdealState(resource.getResourceName()), taskDriver));
    }
    final List<String> failureResources = new ArrayList<>();
    Iterator<ResourcePriority> itr = resourcePriorityQueue.iterator();
    while (itr.hasNext()) {
        Resource resource = itr.next().getResource();
        if (!computeResourceBestPossibleState(event, cache, currentStateOutput, resource, output)) {
            failureResources.add(resource.getResourceName());
            logger.warn("Failed to calculate best possible states for " + resource.getResourceName());
        }
    }
    // Check and report if resource rebalance has failure
    if (!cache.isTaskCache()) {
        ClusterStatusMonitor clusterStatusMonitor = event.getAttribute(AttributeName.clusterStatusMonitor.name());
        updateRebalanceStatus(!failureResources.isEmpty(), helixManager, cache, clusterStatusMonitor, "Failed to calculate best possible states for " + failureResources.size() + " resources.");
    }
    return output;
}
Also used : HelixManager(org.apache.helix.HelixManager) ClusterStatusMonitor(org.apache.helix.monitoring.mbeans.ClusterStatusMonitor)

Aggregations

ClusterStatusMonitor (org.apache.helix.monitoring.mbeans.ClusterStatusMonitor)8 StageException (org.apache.helix.controller.pipeline.StageException)5 HelixManager (org.apache.helix.HelixManager)3 HelixDataAccessor (org.apache.helix.HelixDataAccessor)2 LiveInstance (org.apache.helix.model.LiveInstance)2 Message (org.apache.helix.model.Message)2 Resource (org.apache.helix.model.Resource)2 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 Builder (org.apache.helix.PropertyKey.Builder)1 PartitionStateMap (org.apache.helix.controller.common.PartitionStateMap)1 ClusterConfig (org.apache.helix.model.ClusterConfig)1 CurrentState (org.apache.helix.model.CurrentState)1 InstanceConfig (org.apache.helix.model.InstanceConfig)1 Partition (org.apache.helix.model.Partition)1