Search in sources :

Example 1 with StageException

use of org.apache.helix.controller.pipeline.StageException in project helix by apache.

the class MessageGenerationPhase method process.

@Override
public void process(ClusterEvent event) throws Exception {
    HelixManager manager = event.getAttribute(AttributeName.helixmanager.name());
    ClusterDataCache cache = event.getAttribute(AttributeName.ClusterDataCache.name());
    Map<String, Resource> resourceMap = event.getAttribute(AttributeName.RESOURCES_TO_REBALANCE.name());
    Map<String, List<Message>> pendingMessagesToCleanUp = new HashMap<>();
    CurrentStateOutput currentStateOutput = event.getAttribute(AttributeName.CURRENT_STATE.name());
    IntermediateStateOutput intermediateStateOutput = event.getAttribute(AttributeName.INTERMEDIATE_STATE.name());
    if (manager == null || cache == null || resourceMap == null || currentStateOutput == null || intermediateStateOutput == null) {
        throw new StageException("Missing attributes in event:" + event + ". Requires HelixManager|DataCache|RESOURCES|CURRENT_STATE|INTERMEDIATE_STATE");
    }
    Map<String, LiveInstance> liveInstances = cache.getLiveInstances();
    Map<String, String> sessionIdMap = new HashMap<String, String>();
    for (LiveInstance liveInstance : liveInstances.values()) {
        sessionIdMap.put(liveInstance.getInstanceName(), liveInstance.getSessionId());
    }
    MessageGenerationOutput output = new MessageGenerationOutput();
    for (String resourceName : resourceMap.keySet()) {
        Resource resource = resourceMap.get(resourceName);
        StateModelDefinition stateModelDef = cache.getStateModelDef(resource.getStateModelDefRef());
        if (stateModelDef == null) {
            logger.error("State Model Definition null, skip generating messages for resource: " + resourceName);
            continue;
        }
        for (Partition partition : resource.getPartitions()) {
            Map<String, String> instanceStateMap = new HashMap<String, String>(intermediateStateOutput.getInstanceStateMap(resourceName, partition));
            Map<String, String> pendingStateMap = currentStateOutput.getPendingStateMap(resourceName, partition);
            for (String instance : pendingStateMap.keySet()) {
                if (!instanceStateMap.containsKey(instance)) {
                    instanceStateMap.put(instance, NO_DESIRED_STATE);
                }
            }
            // we should generate message based on the desired-state priority
            // so keep generated messages in a temp map keyed by state
            // desired-state->list of generated-messages
            Map<String, List<Message>> messageMap = new HashMap<String, List<Message>>();
            for (String instanceName : instanceStateMap.keySet()) {
                String desiredState = instanceStateMap.get(instanceName);
                String currentState = currentStateOutput.getCurrentState(resourceName, partition, instanceName);
                if (currentState == null) {
                    currentState = stateModelDef.getInitialState();
                }
                Message pendingMessage = currentStateOutput.getPendingState(resourceName, partition, instanceName);
                boolean isCancellationEnabled = cache.getClusterConfig().isStateTransitionCancelEnabled();
                Message cancellationMessage = currentStateOutput.getCancellationState(resourceName, partition, instanceName);
                String nextState = stateModelDef.getNextStateForTransition(currentState, desiredState);
                Message message = null;
                if (shouldCleanUpPendingMessage(pendingMessage, currentState, currentStateOutput.getEndTime(resourceName, partition, instanceName))) {
                    logger.info("Adding pending message {} on instance {} to clean up. Msg: {}->{}, current state of resource {}:{} is {}", pendingMessage.getMsgId(), instanceName, pendingMessage.getFromState(), pendingMessage.getToState(), resourceName, partition, currentState);
                    if (!pendingMessagesToCleanUp.containsKey(instanceName)) {
                        pendingMessagesToCleanUp.put(instanceName, new ArrayList<Message>());
                    }
                    pendingMessagesToCleanUp.get(instanceName).add(pendingMessage);
                }
                if (desiredState.equals(NO_DESIRED_STATE) || desiredState.equalsIgnoreCase(currentState)) {
                    if (desiredState.equals(NO_DESIRED_STATE) || pendingMessage != null && !currentState.equalsIgnoreCase(pendingMessage.getToState())) {
                        message = createStateTransitionCancellationMessage(manager, resource, partition.getPartitionName(), instanceName, sessionIdMap.get(instanceName), stateModelDef.getId(), pendingMessage.getFromState(), pendingMessage.getToState(), null, cancellationMessage, isCancellationEnabled, currentState);
                    }
                } else {
                    if (nextState == null) {
                        logger.error("Unable to find a next state for resource: " + resource.getResourceName() + " partition: " + partition.getPartitionName() + " from stateModelDefinition" + stateModelDef.getClass() + " from:" + currentState + " to:" + desiredState);
                        continue;
                    }
                    if (pendingMessage != null) {
                        String pendingState = pendingMessage.getToState();
                        if (nextState.equalsIgnoreCase(pendingState)) {
                            logger.debug("Message already exists for " + instanceName + " to transit " + resource.getResourceName() + "." + partition.getPartitionName() + " from " + currentState + " to " + nextState);
                        } else if (currentState.equalsIgnoreCase(pendingState)) {
                            logger.info("Message hasn't been removed for " + instanceName + " to transit " + resource.getResourceName() + "." + partition.getPartitionName() + " to " + pendingState + ", desiredState: " + desiredState);
                        } else {
                            logger.info("IdealState changed before state transition completes for " + resource.getResourceName() + "." + partition.getPartitionName() + " on " + instanceName + ", pendingState: " + pendingState + ", currentState: " + currentState + ", nextState: " + nextState);
                            message = createStateTransitionCancellationMessage(manager, resource, partition.getPartitionName(), instanceName, sessionIdMap.get(instanceName), stateModelDef.getId(), pendingMessage.getFromState(), pendingState, nextState, cancellationMessage, isCancellationEnabled, currentState);
                        }
                    } else {
                        // Create new state transition message
                        message = createStateTransitionMessage(manager, resource, partition.getPartitionName(), instanceName, currentState, nextState, sessionIdMap.get(instanceName), stateModelDef.getId());
                    }
                }
                if (message != null) {
                    IdealState idealState = cache.getIdealState(resourceName);
                    if (idealState != null && idealState.getStateModelDefRef().equalsIgnoreCase(DefaultSchedulerMessageHandlerFactory.SCHEDULER_TASK_QUEUE)) {
                        if (idealState.getRecord().getMapField(partition.getPartitionName()) != null) {
                            message.getRecord().setMapField(Message.Attributes.INNER_MESSAGE.toString(), idealState.getRecord().getMapField(partition.getPartitionName()));
                        }
                    }
                    int timeout = getTimeOut(cache.getClusterConfig(), cache.getResourceConfig(resourceName), currentState, nextState, idealState, partition);
                    if (timeout > 0) {
                        message.setExecutionTimeout(timeout);
                    }
                    message.setAttribute(Message.Attributes.ClusterEventName, event.getEventType().name());
                    // output.addMessage(resourceName, partition, message);
                    if (!messageMap.containsKey(desiredState)) {
                        messageMap.put(desiredState, new ArrayList<Message>());
                    }
                    messageMap.get(desiredState).add(message);
                }
            }
            // add generated messages to output according to state priority
            List<String> statesPriorityList = stateModelDef.getStatesPriorityList();
            for (String state : statesPriorityList) {
                if (messageMap.containsKey(state)) {
                    for (Message message : messageMap.get(state)) {
                        output.addMessage(resourceName, partition, message);
                    }
                }
            }
        }
    // end of for-each-partition
    }
    // Asynchronously clean up pending messages if necessary
    if (!pendingMessagesToCleanUp.isEmpty()) {
        schedulePendingMessageCleanUp(pendingMessagesToCleanUp, cache.getAsyncTasksThreadPool(), manager.getHelixDataAccessor());
    }
    event.addAttribute(AttributeName.MESSAGES_ALL.name(), output);
}
Also used : Partition(org.apache.helix.model.Partition) HelixManager(org.apache.helix.HelixManager) Message(org.apache.helix.model.Message) HashMap(java.util.HashMap) StageException(org.apache.helix.controller.pipeline.StageException) Resource(org.apache.helix.model.Resource) IdealState(org.apache.helix.model.IdealState) LiveInstance(org.apache.helix.model.LiveInstance) StateModelDefinition(org.apache.helix.model.StateModelDefinition) ArrayList(java.util.ArrayList) List(java.util.List)

Example 2 with StageException

use of org.apache.helix.controller.pipeline.StageException in project helix by apache.

the class MessageSelectionStage method process.

@Override
public void process(ClusterEvent event) throws Exception {
    ClusterDataCache cache = event.getAttribute(AttributeName.ClusterDataCache.name());
    Map<String, Resource> resourceMap = event.getAttribute(AttributeName.RESOURCES.name());
    CurrentStateOutput currentStateOutput = event.getAttribute(AttributeName.CURRENT_STATE.name());
    MessageGenerationOutput messageGenOutput = event.getAttribute(AttributeName.MESSAGES_ALL.name());
    if (cache == null || resourceMap == null || currentStateOutput == null || messageGenOutput == null) {
        throw new StageException("Missing attributes in event:" + event + ". Requires DataCache|RESOURCES|CURRENT_STATE|MESSAGES_ALL");
    }
    MessageSelectionStageOutput output = new MessageSelectionStageOutput();
    for (String resourceName : resourceMap.keySet()) {
        Resource resource = resourceMap.get(resourceName);
        StateModelDefinition stateModelDef = cache.getStateModelDef(resource.getStateModelDefRef());
        Map<String, Integer> stateTransitionPriorities = getStateTransitionPriorityMap(stateModelDef);
        IdealState idealState = cache.getIdealState(resourceName);
        Map<String, Bounds> stateConstraints = computeStateConstraints(stateModelDef, idealState, cache);
        for (Partition partition : resource.getPartitions()) {
            List<Message> messages = messageGenOutput.getMessages(resourceName, partition);
            List<Message> selectedMessages = selectMessages(cache.getLiveInstances(), currentStateOutput.getCurrentStateMap(resourceName, partition), currentStateOutput.getPendingMessageMap(resourceName, partition), messages, stateConstraints, stateTransitionPriorities, stateModelDef, resource.isP2PMessageEnabled());
            output.addMessages(resourceName, partition, selectedMessages);
        }
    }
    event.addAttribute(AttributeName.MESSAGES_SELECTED.name(), output);
}
Also used : Partition(org.apache.helix.model.Partition) Message(org.apache.helix.model.Message) StageException(org.apache.helix.controller.pipeline.StageException) Resource(org.apache.helix.model.Resource) IdealState(org.apache.helix.model.IdealState) StateModelDefinition(org.apache.helix.model.StateModelDefinition)

Example 3 with StageException

use of org.apache.helix.controller.pipeline.StageException in project helix by apache.

the class MessageThrottleStage method process.

@Override
public void process(ClusterEvent event) throws Exception {
    ClusterDataCache cache = event.getAttribute(AttributeName.ClusterDataCache.name());
    MessageSelectionStageOutput msgSelectionOutput = event.getAttribute(AttributeName.MESSAGES_SELECTED.name());
    Map<String, Resource> resourceMap = event.getAttribute(AttributeName.RESOURCES.name());
    if (cache == null || resourceMap == null || msgSelectionOutput == null) {
        throw new StageException("Missing attributes in event: " + event + ". Requires ClusterDataCache|RESOURCES|MESSAGES_SELECTED");
    }
    MessageThrottleStageOutput output = new MessageThrottleStageOutput();
    ClusterConstraints constraint = cache.getConstraint(ConstraintType.MESSAGE_CONSTRAINT);
    Map<String, Integer> throttleCounterMap = new HashMap<String, Integer>();
    if (constraint != null) {
        // go through all pending messages, they should be counted but not throttled
        for (String instance : cache.getLiveInstances().keySet()) {
            throttle(throttleCounterMap, constraint, new ArrayList<Message>(cache.getMessages(instance).values()), false);
        }
    }
    // assume messages should be sorted by state transition priority in messageSelection stage
    for (String resourceName : resourceMap.keySet()) {
        Resource resource = resourceMap.get(resourceName);
        for (Partition partition : resource.getPartitions()) {
            List<Message> messages = msgSelectionOutput.getMessages(resourceName, partition);
            if (constraint != null && messages != null && messages.size() > 0) {
                messages = throttle(throttleCounterMap, constraint, messages, true);
            }
            output.addMessages(resourceName, partition, messages);
        }
    }
    event.addAttribute(AttributeName.MESSAGES_THROTTLE.name(), output);
}
Also used : Partition(org.apache.helix.model.Partition) Message(org.apache.helix.model.Message) HashMap(java.util.HashMap) StageException(org.apache.helix.controller.pipeline.StageException) Resource(org.apache.helix.model.Resource) ClusterConstraints(org.apache.helix.model.ClusterConstraints)

Example 4 with StageException

use of org.apache.helix.controller.pipeline.StageException in project helix by apache.

the class ReadClusterDataStage method process.

@Override
public void process(ClusterEvent event) throws Exception {
    HelixManager manager = event.getAttribute(AttributeName.helixmanager.name());
    if (manager == null) {
        throw new StageException("HelixManager attribute value is null");
    }
    ClusterDataCache cache = event.getAttribute(AttributeName.ClusterDataCache.name());
    if (cache == null && _cache == null) {
        cache = new ClusterDataCache(event.getClusterName());
    }
    _cache = cache;
    HelixDataAccessor dataAccessor = manager.getHelixDataAccessor();
    _cache.refresh(dataAccessor);
    final ClusterConfig clusterConfig = cache.getClusterConfig();
    if (!_cache.isTaskCache()) {
        final ClusterStatusMonitor clusterStatusMonitor = event.getAttribute(AttributeName.clusterStatusMonitor.name());
        asyncExecute(_cache.getAsyncTasksThreadPool(), new Callable<Object>() {

            @Override
            public Object call() {
                // Update the cluster status gauges
                if (clusterStatusMonitor != null) {
                    logger.debug("Update cluster status monitors");
                    Set<String> instanceSet = Sets.newHashSet();
                    Set<String> liveInstanceSet = Sets.newHashSet();
                    Set<String> disabledInstanceSet = Sets.newHashSet();
                    Map<String, Map<String, List<String>>> disabledPartitions = Maps.newHashMap();
                    Map<String, List<String>> oldDisabledPartitions = Maps.newHashMap();
                    Map<String, Set<String>> tags = Maps.newHashMap();
                    Map<String, LiveInstance> liveInstanceMap = _cache.getLiveInstances();
                    for (Map.Entry<String, InstanceConfig> e : _cache.getInstanceConfigMap().entrySet()) {
                        String instanceName = e.getKey();
                        InstanceConfig config = e.getValue();
                        instanceSet.add(instanceName);
                        if (liveInstanceMap.containsKey(instanceName)) {
                            liveInstanceSet.add(instanceName);
                        }
                        if (!config.getInstanceEnabled() || (clusterConfig.getDisabledInstances() != null && clusterConfig.getDisabledInstances().containsKey(instanceName))) {
                            disabledInstanceSet.add(instanceName);
                        }
                        // TODO : Get rid of this data structure once the API is removed.
                        oldDisabledPartitions.put(instanceName, config.getDisabledPartitions());
                        disabledPartitions.put(instanceName, config.getDisabledPartitionsMap());
                        Set<String> instanceTags = Sets.newHashSet(config.getTags());
                        tags.put(instanceName, instanceTags);
                    }
                    clusterStatusMonitor.setClusterInstanceStatus(liveInstanceSet, instanceSet, disabledInstanceSet, disabledPartitions, oldDisabledPartitions, tags);
                    logger.debug("Complete cluster status monitors update.");
                }
                return null;
            }
        });
    }
    event.addAttribute(AttributeName.ClusterDataCache.name(), _cache);
}
Also used : HelixManager(org.apache.helix.HelixManager) Set(java.util.Set) StageException(org.apache.helix.controller.pipeline.StageException) ClusterStatusMonitor(org.apache.helix.monitoring.mbeans.ClusterStatusMonitor) HelixDataAccessor(org.apache.helix.HelixDataAccessor) InstanceConfig(org.apache.helix.model.InstanceConfig) List(java.util.List) Map(java.util.Map) ClusterConfig(org.apache.helix.model.ClusterConfig)

Example 5 with StageException

use of org.apache.helix.controller.pipeline.StageException in project helix by apache.

the class ExternalViewComputeStage method process.

@Override
public void process(ClusterEvent event) throws Exception {
    HelixManager manager = event.getAttribute(AttributeName.helixmanager.name());
    Map<String, Resource> resourceMap = event.getAttribute(AttributeName.RESOURCES.name());
    ClusterDataCache cache = event.getAttribute(AttributeName.ClusterDataCache.name());
    if (manager == null || resourceMap == null || cache == null) {
        throw new StageException("Missing attributes in event:" + event + ". Requires ClusterManager|RESOURCES|DataCache");
    }
    HelixDataAccessor dataAccessor = manager.getHelixDataAccessor();
    PropertyKey.Builder keyBuilder = dataAccessor.keyBuilder();
    CurrentStateOutput currentStateOutput = event.getAttribute(AttributeName.CURRENT_STATE.name());
    List<ExternalView> newExtViews = new ArrayList<>();
    Map<String, ExternalView> curExtViews = cache.getExternalViews();
    for (String resourceName : resourceMap.keySet()) {
        ExternalView view = new ExternalView(resourceName);
        // view.setBucketSize(currentStateOutput.getBucketSize(resourceName));
        // if resource ideal state has bucket size, set it
        // otherwise resource has been dropped, use bucket size from current state instead
        Resource resource = resourceMap.get(resourceName);
        if (resource.getBucketSize() > 0) {
            view.setBucketSize(resource.getBucketSize());
        } else {
            view.setBucketSize(currentStateOutput.getBucketSize(resourceName));
        }
        for (Partition partition : resource.getPartitions()) {
            Map<String, String> currentStateMap = currentStateOutput.getCurrentStateMap(resourceName, partition);
            if (currentStateMap != null && currentStateMap.size() > 0) {
                // = cache.getDisabledInstancesForResource(resource.toString());
                for (String instance : currentStateMap.keySet()) {
                    // if (!disabledInstances.contains(instance))
                    // {
                    view.setState(partition.getPartitionName(), instance, currentStateMap.get(instance));
                // }
                }
            }
        }
        // Update cluster status monitor mbean
        IdealState idealState = cache.getIdealState(resourceName);
        if (!cache.isTaskCache()) {
            ResourceConfig resourceConfig = cache.getResourceConfig(resourceName);
            ClusterStatusMonitor clusterStatusMonitor = event.getAttribute(AttributeName.clusterStatusMonitor.name());
            if (clusterStatusMonitor != null) {
                if (idealState != null && (resourceConfig == null || !resourceConfig.isMonitoringDisabled())) {
                    if (!idealState.getStateModelDefRef().equalsIgnoreCase(DefaultSchedulerMessageHandlerFactory.SCHEDULER_TASK_QUEUE)) {
                        StateModelDefinition stateModelDef = cache.getStateModelDef(idealState.getStateModelDefRef());
                        clusterStatusMonitor.setResourceStatus(view, cache.getIdealState(view.getResourceName()), stateModelDef);
                    }
                } else {
                    // Drop the metrics if the resource is dropped, or the MonitorDisabled is changed to true.
                    clusterStatusMonitor.unregisterResource(view.getResourceName());
                }
            }
        }
        ExternalView curExtView = curExtViews.get(resourceName);
        // copy simplefields from IS, in cases where IS is deleted copy it from existing ExternalView
        if (idealState != null) {
            view.getRecord().getSimpleFields().putAll(idealState.getRecord().getSimpleFields());
        } else if (curExtView != null) {
            view.getRecord().getSimpleFields().putAll(curExtView.getRecord().getSimpleFields());
        }
        // compare the new external view with current one, set only on different
        if (curExtView == null || !curExtView.getRecord().equals(view.getRecord())) {
            // Add external view to the list which will be written to ZK later.
            newExtViews.add(view);
            // scheduler message, and then remove the partitions from the ideal state
            if (idealState != null && idealState.getStateModelDefRef().equalsIgnoreCase(DefaultSchedulerMessageHandlerFactory.SCHEDULER_TASK_QUEUE)) {
                updateScheduledTaskStatus(view, manager, idealState);
            }
        }
    }
    List<String> externalviewsToRemove = new ArrayList<>();
    // TODO: consider not setting the externalview of SCHEDULER_TASK_QUEUE at all.
    // Are there any entity that will be interested in its change?
    // For the resource with DisableExternalView option turned on in IdealState
    // We will not actually create or write the externalView to ZooKeeper.
    List<PropertyKey> keys = new ArrayList<>();
    for (Iterator<ExternalView> it = newExtViews.iterator(); it.hasNext(); ) {
        ExternalView view = it.next();
        String resourceName = view.getResourceName();
        IdealState idealState = cache.getIdealState(resourceName);
        if (idealState != null && idealState.isExternalViewDisabled()) {
            it.remove();
            // remove the external view if the external view exists
            if (curExtViews.containsKey(resourceName)) {
                LOG.info("Remove externalView for resource: " + resourceName);
                dataAccessor.removeProperty(keyBuilder.externalView(resourceName));
                externalviewsToRemove.add(resourceName);
            }
        } else {
            keys.add(keyBuilder.externalView(resourceName));
        }
    }
    // add/update external-views
    if (newExtViews.size() > 0) {
        dataAccessor.setChildren(keys, newExtViews);
        cache.updateExternalViews(newExtViews);
    }
    // remove dead external-views
    for (String resourceName : curExtViews.keySet()) {
        if (!resourceMap.keySet().contains(resourceName)) {
            LOG.info("Remove externalView for resource: " + resourceName);
            dataAccessor.removeProperty(keyBuilder.externalView(resourceName));
            externalviewsToRemove.add(resourceName);
        }
    }
    cache.removeExternalViews(externalviewsToRemove);
}
Also used : StageException(org.apache.helix.controller.pipeline.StageException) Builder(org.apache.helix.PropertyKey.Builder) ClusterStatusMonitor(org.apache.helix.monitoring.mbeans.ClusterStatusMonitor)

Aggregations

StageException (org.apache.helix.controller.pipeline.StageException)11 Resource (org.apache.helix.model.Resource)7 LiveInstance (org.apache.helix.model.LiveInstance)5 Message (org.apache.helix.model.Message)5 ClusterStatusMonitor (org.apache.helix.monitoring.mbeans.ClusterStatusMonitor)5 HelixManager (org.apache.helix.HelixManager)4 IdealState (org.apache.helix.model.IdealState)4 Partition (org.apache.helix.model.Partition)4 StateModelDefinition (org.apache.helix.model.StateModelDefinition)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2 HelixDataAccessor (org.apache.helix.HelixDataAccessor)2 ClusterConfig (org.apache.helix.model.ClusterConfig)2 CurrentState (org.apache.helix.model.CurrentState)2 LinkedHashMap (java.util.LinkedHashMap)1 Set (java.util.Set)1 HelixManagerProperties (org.apache.helix.HelixManagerProperties)1 Builder (org.apache.helix.PropertyKey.Builder)1