use of org.apache.helix.monitoring.mbeans.ClusterStatusMonitor in project helix by apache.
the class ReadClusterDataStage method process.
@Override
public void process(ClusterEvent event) throws Exception {
HelixManager manager = event.getAttribute(AttributeName.helixmanager.name());
if (manager == null) {
throw new StageException("HelixManager attribute value is null");
}
ClusterDataCache cache = event.getAttribute(AttributeName.ClusterDataCache.name());
if (cache == null && _cache == null) {
cache = new ClusterDataCache(event.getClusterName());
}
_cache = cache;
HelixDataAccessor dataAccessor = manager.getHelixDataAccessor();
_cache.refresh(dataAccessor);
final ClusterConfig clusterConfig = cache.getClusterConfig();
if (!_cache.isTaskCache()) {
final ClusterStatusMonitor clusterStatusMonitor = event.getAttribute(AttributeName.clusterStatusMonitor.name());
asyncExecute(_cache.getAsyncTasksThreadPool(), new Callable<Object>() {
@Override
public Object call() {
// Update the cluster status gauges
if (clusterStatusMonitor != null) {
logger.debug("Update cluster status monitors");
Set<String> instanceSet = Sets.newHashSet();
Set<String> liveInstanceSet = Sets.newHashSet();
Set<String> disabledInstanceSet = Sets.newHashSet();
Map<String, Map<String, List<String>>> disabledPartitions = Maps.newHashMap();
Map<String, List<String>> oldDisabledPartitions = Maps.newHashMap();
Map<String, Set<String>> tags = Maps.newHashMap();
Map<String, LiveInstance> liveInstanceMap = _cache.getLiveInstances();
for (Map.Entry<String, InstanceConfig> e : _cache.getInstanceConfigMap().entrySet()) {
String instanceName = e.getKey();
InstanceConfig config = e.getValue();
instanceSet.add(instanceName);
if (liveInstanceMap.containsKey(instanceName)) {
liveInstanceSet.add(instanceName);
}
if (!config.getInstanceEnabled() || (clusterConfig.getDisabledInstances() != null && clusterConfig.getDisabledInstances().containsKey(instanceName))) {
disabledInstanceSet.add(instanceName);
}
// TODO : Get rid of this data structure once the API is removed.
oldDisabledPartitions.put(instanceName, config.getDisabledPartitions());
disabledPartitions.put(instanceName, config.getDisabledPartitionsMap());
Set<String> instanceTags = Sets.newHashSet(config.getTags());
tags.put(instanceName, instanceTags);
}
clusterStatusMonitor.setClusterInstanceStatus(liveInstanceSet, instanceSet, disabledInstanceSet, disabledPartitions, oldDisabledPartitions, tags);
logger.debug("Complete cluster status monitors update.");
}
return null;
}
});
}
event.addAttribute(AttributeName.ClusterDataCache.name(), _cache);
}
use of org.apache.helix.monitoring.mbeans.ClusterStatusMonitor in project helix by apache.
the class ExternalViewComputeStage method process.
@Override
public void process(ClusterEvent event) throws Exception {
HelixManager manager = event.getAttribute(AttributeName.helixmanager.name());
Map<String, Resource> resourceMap = event.getAttribute(AttributeName.RESOURCES.name());
ClusterDataCache cache = event.getAttribute(AttributeName.ClusterDataCache.name());
if (manager == null || resourceMap == null || cache == null) {
throw new StageException("Missing attributes in event:" + event + ". Requires ClusterManager|RESOURCES|DataCache");
}
HelixDataAccessor dataAccessor = manager.getHelixDataAccessor();
PropertyKey.Builder keyBuilder = dataAccessor.keyBuilder();
CurrentStateOutput currentStateOutput = event.getAttribute(AttributeName.CURRENT_STATE.name());
List<ExternalView> newExtViews = new ArrayList<>();
Map<String, ExternalView> curExtViews = cache.getExternalViews();
for (String resourceName : resourceMap.keySet()) {
ExternalView view = new ExternalView(resourceName);
// view.setBucketSize(currentStateOutput.getBucketSize(resourceName));
// if resource ideal state has bucket size, set it
// otherwise resource has been dropped, use bucket size from current state instead
Resource resource = resourceMap.get(resourceName);
if (resource.getBucketSize() > 0) {
view.setBucketSize(resource.getBucketSize());
} else {
view.setBucketSize(currentStateOutput.getBucketSize(resourceName));
}
for (Partition partition : resource.getPartitions()) {
Map<String, String> currentStateMap = currentStateOutput.getCurrentStateMap(resourceName, partition);
if (currentStateMap != null && currentStateMap.size() > 0) {
// = cache.getDisabledInstancesForResource(resource.toString());
for (String instance : currentStateMap.keySet()) {
// if (!disabledInstances.contains(instance))
// {
view.setState(partition.getPartitionName(), instance, currentStateMap.get(instance));
// }
}
}
}
// Update cluster status monitor mbean
IdealState idealState = cache.getIdealState(resourceName);
if (!cache.isTaskCache()) {
ResourceConfig resourceConfig = cache.getResourceConfig(resourceName);
ClusterStatusMonitor clusterStatusMonitor = event.getAttribute(AttributeName.clusterStatusMonitor.name());
if (clusterStatusMonitor != null) {
if (idealState != null && (resourceConfig == null || !resourceConfig.isMonitoringDisabled())) {
if (!idealState.getStateModelDefRef().equalsIgnoreCase(DefaultSchedulerMessageHandlerFactory.SCHEDULER_TASK_QUEUE)) {
StateModelDefinition stateModelDef = cache.getStateModelDef(idealState.getStateModelDefRef());
clusterStatusMonitor.setResourceStatus(view, cache.getIdealState(view.getResourceName()), stateModelDef);
}
} else {
// Drop the metrics if the resource is dropped, or the MonitorDisabled is changed to true.
clusterStatusMonitor.unregisterResource(view.getResourceName());
}
}
}
ExternalView curExtView = curExtViews.get(resourceName);
// copy simplefields from IS, in cases where IS is deleted copy it from existing ExternalView
if (idealState != null) {
view.getRecord().getSimpleFields().putAll(idealState.getRecord().getSimpleFields());
} else if (curExtView != null) {
view.getRecord().getSimpleFields().putAll(curExtView.getRecord().getSimpleFields());
}
// compare the new external view with current one, set only on different
if (curExtView == null || !curExtView.getRecord().equals(view.getRecord())) {
// Add external view to the list which will be written to ZK later.
newExtViews.add(view);
// scheduler message, and then remove the partitions from the ideal state
if (idealState != null && idealState.getStateModelDefRef().equalsIgnoreCase(DefaultSchedulerMessageHandlerFactory.SCHEDULER_TASK_QUEUE)) {
updateScheduledTaskStatus(view, manager, idealState);
}
}
}
List<String> externalviewsToRemove = new ArrayList<>();
// TODO: consider not setting the externalview of SCHEDULER_TASK_QUEUE at all.
// Are there any entity that will be interested in its change?
// For the resource with DisableExternalView option turned on in IdealState
// We will not actually create or write the externalView to ZooKeeper.
List<PropertyKey> keys = new ArrayList<>();
for (Iterator<ExternalView> it = newExtViews.iterator(); it.hasNext(); ) {
ExternalView view = it.next();
String resourceName = view.getResourceName();
IdealState idealState = cache.getIdealState(resourceName);
if (idealState != null && idealState.isExternalViewDisabled()) {
it.remove();
// remove the external view if the external view exists
if (curExtViews.containsKey(resourceName)) {
LOG.info("Remove externalView for resource: " + resourceName);
dataAccessor.removeProperty(keyBuilder.externalView(resourceName));
externalviewsToRemove.add(resourceName);
}
} else {
keys.add(keyBuilder.externalView(resourceName));
}
}
// add/update external-views
if (newExtViews.size() > 0) {
dataAccessor.setChildren(keys, newExtViews);
cache.updateExternalViews(newExtViews);
}
// remove dead external-views
for (String resourceName : curExtViews.keySet()) {
if (!resourceMap.keySet().contains(resourceName)) {
LOG.info("Remove externalView for resource: " + resourceName);
dataAccessor.removeProperty(keyBuilder.externalView(resourceName));
externalviewsToRemove.add(resourceName);
}
}
cache.removeExternalViews(externalviewsToRemove);
}
use of org.apache.helix.monitoring.mbeans.ClusterStatusMonitor in project helix by apache.
the class Pipeline method handle.
public void handle(ClusterEvent event) throws Exception {
if (_stages == null) {
return;
}
for (Stage stage : _stages) {
long startTime = System.currentTimeMillis();
stage.preProcess();
stage.process(event);
stage.postProcess();
long endTime = System.currentTimeMillis();
long duration = endTime - startTime;
logger.info(String.format("END %s for %s pipeline for cluster %s. took: %d ms ", stage.getStageName(), _pipelineType, event.getClusterName(), duration));
ClusterStatusMonitor clusterStatusMonitor = event.getAttribute(AttributeName.clusterStatusMonitor.name());
if (clusterStatusMonitor != null) {
clusterStatusMonitor.updateClusterEventDuration(stage.getStageName(), duration);
}
}
}
use of org.apache.helix.monitoring.mbeans.ClusterStatusMonitor in project helix by apache.
the class BestPossibleStateCalcStage method process.
@Override
public void process(ClusterEvent event) throws Exception {
CurrentStateOutput currentStateOutput = event.getAttribute(AttributeName.CURRENT_STATE.name());
final Map<String, Resource> resourceMap = event.getAttribute(AttributeName.RESOURCES_TO_REBALANCE.name());
final ClusterStatusMonitor clusterStatusMonitor = event.getAttribute(AttributeName.clusterStatusMonitor.name());
ClusterDataCache cache = event.getAttribute(AttributeName.ClusterDataCache.name());
if (currentStateOutput == null || resourceMap == null || cache == null) {
throw new StageException("Missing attributes in event:" + event + ". Requires CURRENT_STATE|RESOURCES|DataCache");
}
// Reset current INIT/RUNNING tasks on participants for throttling
cache.resetActiveTaskCount(currentStateOutput);
// Check whether the offline/disabled instance count in the cluster reaches the set limit,
// if yes, pause the rebalancer.
validateOfflineInstancesLimit(cache, (HelixManager) event.getAttribute(AttributeName.helixmanager.name()), clusterStatusMonitor);
final BestPossibleStateOutput bestPossibleStateOutput = compute(event, resourceMap, currentStateOutput);
event.addAttribute(AttributeName.BEST_POSSIBLE_STATE.name(), bestPossibleStateOutput);
if (!cache.isTaskCache()) {
final Map<String, InstanceConfig> instanceConfigMap = cache.getInstanceConfigMap();
final Map<String, StateModelDefinition> stateModelDefMap = cache.getStateModelDefMap();
asyncExecute(cache.getAsyncTasksThreadPool(), new Callable<Object>() {
@Override
public Object call() {
try {
if (clusterStatusMonitor != null) {
clusterStatusMonitor.setPerInstanceResourceStatus(bestPossibleStateOutput, instanceConfigMap, resourceMap, stateModelDefMap);
}
} catch (Exception e) {
logger.error("Could not update cluster status metrics!", e);
}
return null;
}
});
}
}
use of org.apache.helix.monitoring.mbeans.ClusterStatusMonitor in project helix by apache.
the class BestPossibleStateCalcStage method compute.
private BestPossibleStateOutput compute(ClusterEvent event, Map<String, Resource> resourceMap, CurrentStateOutput currentStateOutput) {
ClusterDataCache cache = event.getAttribute(AttributeName.ClusterDataCache.name());
BestPossibleStateOutput output = new BestPossibleStateOutput();
PriorityQueue<ResourcePriority> resourcePriorityQueue = new PriorityQueue<>();
TaskDriver taskDriver = null;
HelixManager helixManager = event.getAttribute(AttributeName.helixmanager.name());
if (helixManager != null) {
taskDriver = new TaskDriver(helixManager);
}
for (Resource resource : resourceMap.values()) {
resourcePriorityQueue.add(new ResourcePriority(resource, cache.getIdealState(resource.getResourceName()), taskDriver));
}
final List<String> failureResources = new ArrayList<>();
Iterator<ResourcePriority> itr = resourcePriorityQueue.iterator();
while (itr.hasNext()) {
Resource resource = itr.next().getResource();
if (!computeResourceBestPossibleState(event, cache, currentStateOutput, resource, output)) {
failureResources.add(resource.getResourceName());
logger.warn("Failed to calculate best possible states for " + resource.getResourceName());
}
}
// Check and report if resource rebalance has failure
if (!cache.isTaskCache()) {
ClusterStatusMonitor clusterStatusMonitor = event.getAttribute(AttributeName.clusterStatusMonitor.name());
updateRebalanceStatus(!failureResources.isEmpty(), helixManager, cache, clusterStatusMonitor, "Failed to calculate best possible states for " + failureResources.size() + " resources.");
}
return output;
}
Aggregations