Search in sources :

Example 86 with HelixManager

use of org.apache.helix.HelixManager in project helix by apache.

the class AgentStateModel method genericStateTransitionHandler.

@Transition(to = "*", from = "*")
public void genericStateTransitionHandler(Message message, NotificationContext context) throws Exception {
    // first try get command from message
    String cmd = message.getRecord().getSimpleField(CommandAttribute.COMMAND.getName());
    String workingDir = message.getRecord().getSimpleField(CommandAttribute.WORKING_DIR.getName());
    String timeout = message.getRecord().getSimpleField(CommandAttribute.TIMEOUT.getName());
    String pidFile = message.getRecord().getSimpleField(CommandAttribute.PID_FILE.getName());
    HelixManager manager = context.getManager();
    String clusterName = manager.getClusterName();
    String fromState = message.getFromState();
    String toState = message.getToState();
    // construct keys for command-config
    String cmdKey = buildKey(fromState, toState, CommandAttribute.COMMAND);
    String workingDirKey = buildKey(fromState, toState, CommandAttribute.WORKING_DIR);
    String timeoutKey = buildKey(fromState, toState, CommandAttribute.TIMEOUT);
    String pidFileKey = buildKey(fromState, toState, CommandAttribute.PID_FILE);
    List<String> cmdConfigKeys = Arrays.asList(cmdKey, workingDirKey, timeoutKey, pidFileKey);
    // read command from resource-scope configures
    if (cmd == null) {
        HelixConfigScope resourceScope = new HelixConfigScopeBuilder(ConfigScopeProperty.RESOURCE).forCluster(clusterName).forResource(message.getResourceName()).build();
        Map<String, String> cmdKeyValueMap = manager.getConfigAccessor().get(resourceScope, cmdConfigKeys);
        if (cmdKeyValueMap != null) {
            cmd = cmdKeyValueMap.get(cmdKey);
            workingDir = cmdKeyValueMap.get(workingDirKey);
            timeout = cmdKeyValueMap.get(timeoutKey);
            pidFile = cmdKeyValueMap.get(pidFileKey);
        }
    }
    // if resource-scope doesn't contain command, fall back to cluster-scope configures
    if (cmd == null) {
        HelixConfigScope clusterScope = new HelixConfigScopeBuilder(ConfigScopeProperty.CLUSTER).forCluster(clusterName).build();
        Map<String, String> cmdKeyValueMap = manager.getConfigAccessor().get(clusterScope, cmdConfigKeys);
        if (cmdKeyValueMap != null) {
            cmd = cmdKeyValueMap.get(cmdKey);
            workingDir = cmdKeyValueMap.get(workingDirKey);
            timeout = cmdKeyValueMap.get(timeoutKey);
            pidFile = cmdKeyValueMap.get(pidFileKey);
        }
    }
    if (cmd == null) {
        throw new Exception("Unable to find command for transition from:" + message.getFromState() + " to:" + message.getToState());
    }
    _logger.info("Executing command: " + cmd + ", using workingDir: " + workingDir + ", timeout: " + timeout + ", on " + manager.getInstanceName());
    // skip nop command
    if (cmd.equals(CommandAttribute.NOP.getName())) {
        return;
    }
    // split the cmd to actual cmd and args[]
    String[] cmdSplits = cmd.trim().split("\\s+");
    String cmdValue = cmdSplits[0];
    String[] args = Arrays.copyOfRange(cmdSplits, 1, cmdSplits.length);
    // get the command-execution timeout
    // 0 means wait for ever
    long timeoutValue = 0;
    if (timeout != null) {
        try {
            timeoutValue = Long.parseLong(timeout);
        } catch (NumberFormatException e) {
        // OK to use 0
        }
    }
    ExternalCommand externalCmd = ExternalCommand.executeWithTimeout(new File(workingDir), cmdValue, timeoutValue, args);
    int exitValue = externalCmd.exitValue();
    if (_logger.isDebugEnabled()) {
        _logger.debug("command: " + cmd + ", exitValue: " + exitValue + " output:\n" + externalCmd.getStringOutput());
    }
    // monitor pid if pidFile exists
    if (pidFile == null) {
        // no pid to monitor
        return;
    }
    String pidFileValue = instantiateByMessage(pidFile, message);
    String pid = SystemUtil.getPidFromFile(new File(pidFileValue));
    if (pid != null) {
        new ProcessMonitorThread(pid).start();
    }
}
Also used : ExternalCommand(org.apache.helix.ExternalCommand) HelixManager(org.apache.helix.HelixManager) HelixConfigScopeBuilder(org.apache.helix.model.builder.HelixConfigScopeBuilder) HelixConfigScope(org.apache.helix.model.HelixConfigScope) File(java.io.File) Transition(org.apache.helix.participant.statemachine.Transition)

Example 87 with HelixManager

use of org.apache.helix.HelixManager in project helix by apache.

the class BestPossibleStateCalcStage method computeResourceBestPossibleState.

private boolean computeResourceBestPossibleState(ClusterEvent event, ClusterDataCache cache, CurrentStateOutput currentStateOutput, Resource resource, BestPossibleStateOutput output) {
    // for each ideal state
    // read the state model def
    // for each resource
    // get the preference list
    // for each instanceName check if its alive then assign a state
    String resourceName = resource.getResourceName();
    logger.debug("Processing resource:" + resourceName);
    // Ideal state may be gone. In that case we need to get the state model name
    // from the current state
    IdealState idealState = cache.getIdealState(resourceName);
    if (idealState == null) {
        // if ideal state is deleted, use an empty one
        logger.info("resource:" + resourceName + " does not exist anymore");
        idealState = new IdealState(resourceName);
        idealState.setStateModelDefRef(resource.getStateModelDefRef());
    }
    Rebalancer rebalancer = getRebalancer(idealState, resourceName, cache.isMaintenanceModeEnabled());
    MappingCalculator mappingCalculator = getMappingCalculator(rebalancer, resourceName);
    if (rebalancer == null || mappingCalculator == null) {
        logger.error("Error computing assignment for resource " + resourceName + ". no rebalancer found. rebalancer: " + rebalancer + " mappingCaculator: " + mappingCalculator);
    }
    if (rebalancer != null && mappingCalculator != null) {
        if (rebalancer instanceof TaskRebalancer) {
            TaskRebalancer taskRebalancer = TaskRebalancer.class.cast(rebalancer);
            taskRebalancer.setClusterStatusMonitor((ClusterStatusMonitor) event.getAttribute(AttributeName.clusterStatusMonitor.name()));
        }
        ResourceAssignment partitionStateAssignment = null;
        try {
            HelixManager manager = event.getAttribute(AttributeName.helixmanager.name());
            rebalancer.init(manager);
            idealState = rebalancer.computeNewIdealState(resourceName, idealState, currentStateOutput, cache);
            output.setPreferenceLists(resourceName, idealState.getPreferenceLists());
            // Use the internal MappingCalculator interface to compute the final assignment
            // The next release will support rebalancers that compute the mapping from start to finish
            partitionStateAssignment = mappingCalculator.computeBestPossiblePartitionState(cache, idealState, resource, currentStateOutput);
            for (Partition partition : resource.getPartitions()) {
                Map<String, String> newStateMap = partitionStateAssignment.getReplicaMap(partition);
                output.setState(resourceName, partition, newStateMap);
            }
            // Check if calculation is done successfully
            return checkBestPossibleStateCalculation(idealState);
        } catch (Exception e) {
            logger.error("Error computing assignment for resource " + resourceName + ". Skipping.", e);
            // TODO : remove this part after debugging NPE
            StringBuilder sb = new StringBuilder();
            sb.append(String.format("HelixManager is null : %s\n", event.getAttribute("helixmanager") == null));
            sb.append(String.format("Rebalancer is null : %s\n", rebalancer == null));
            sb.append(String.format("Calculated idealState is null : %s\n", idealState == null));
            sb.append(String.format("MappingCaculator is null : %s\n", mappingCalculator == null));
            sb.append(String.format("PartitionAssignment is null : %s\n", partitionStateAssignment == null));
            sb.append(String.format("Output is null : %s\n", output == null));
            logger.error(sb.toString());
        }
    }
    // Exception or rebalancer is not found
    return false;
}
Also used : AutoRebalancer(org.apache.helix.controller.rebalancer.AutoRebalancer) Rebalancer(org.apache.helix.controller.rebalancer.Rebalancer) SemiAutoRebalancer(org.apache.helix.controller.rebalancer.SemiAutoRebalancer) CustomRebalancer(org.apache.helix.controller.rebalancer.CustomRebalancer) MaintenanceRebalancer(org.apache.helix.controller.rebalancer.MaintenanceRebalancer) HelixManager(org.apache.helix.HelixManager) StageException(org.apache.helix.controller.pipeline.StageException) MappingCalculator(org.apache.helix.controller.rebalancer.internal.MappingCalculator)

Example 88 with HelixManager

use of org.apache.helix.HelixManager in project helix by apache.

the class BestPossibleStateCalcStage method compute.

private BestPossibleStateOutput compute(ClusterEvent event, Map<String, Resource> resourceMap, CurrentStateOutput currentStateOutput) {
    ClusterDataCache cache = event.getAttribute(AttributeName.ClusterDataCache.name());
    BestPossibleStateOutput output = new BestPossibleStateOutput();
    PriorityQueue<ResourcePriority> resourcePriorityQueue = new PriorityQueue<>();
    TaskDriver taskDriver = null;
    HelixManager helixManager = event.getAttribute(AttributeName.helixmanager.name());
    if (helixManager != null) {
        taskDriver = new TaskDriver(helixManager);
    }
    for (Resource resource : resourceMap.values()) {
        resourcePriorityQueue.add(new ResourcePriority(resource, cache.getIdealState(resource.getResourceName()), taskDriver));
    }
    final List<String> failureResources = new ArrayList<>();
    Iterator<ResourcePriority> itr = resourcePriorityQueue.iterator();
    while (itr.hasNext()) {
        Resource resource = itr.next().getResource();
        if (!computeResourceBestPossibleState(event, cache, currentStateOutput, resource, output)) {
            failureResources.add(resource.getResourceName());
            logger.warn("Failed to calculate best possible states for " + resource.getResourceName());
        }
    }
    // Check and report if resource rebalance has failure
    if (!cache.isTaskCache()) {
        ClusterStatusMonitor clusterStatusMonitor = event.getAttribute(AttributeName.clusterStatusMonitor.name());
        updateRebalanceStatus(!failureResources.isEmpty(), helixManager, cache, clusterStatusMonitor, "Failed to calculate best possible states for " + failureResources.size() + " resources.");
    }
    return output;
}
Also used : HelixManager(org.apache.helix.HelixManager) ClusterStatusMonitor(org.apache.helix.monitoring.mbeans.ClusterStatusMonitor)

Example 89 with HelixManager

use of org.apache.helix.HelixManager in project helix by apache.

the class CompatibilityCheckStage method process.

@Override
public void process(ClusterEvent event) throws Exception {
    HelixManager manager = event.getAttribute(AttributeName.helixmanager.name());
    ClusterDataCache cache = event.getAttribute(AttributeName.ClusterDataCache.name());
    if (manager == null || cache == null) {
        throw new StageException("Missing attributes in event:" + event + ". Requires HelixManager | DataCache");
    }
    HelixManagerProperties properties = manager.getProperties();
    Map<String, LiveInstance> liveInstanceMap = cache.getLiveInstances();
    for (LiveInstance liveInstance : liveInstanceMap.values()) {
        String participantVersion = liveInstance.getHelixVersion();
        if (!properties.isParticipantCompatible(participantVersion)) {
            String errorMsg = "incompatible participant. pipeline will not continue. " + "controller: " + manager.getInstanceName() + ", controllerVersion: " + properties.getVersion() + ", minimumSupportedParticipantVersion: " + properties.getProperty("miminum_supported_version.participant") + ", participant: " + liveInstance.getInstanceName() + ", participantVersion: " + participantVersion;
            LOG.error(errorMsg);
            throw new StageException(errorMsg);
        }
    }
}
Also used : HelixManagerProperties(org.apache.helix.HelixManagerProperties) HelixManager(org.apache.helix.HelixManager) LiveInstance(org.apache.helix.model.LiveInstance) StageException(org.apache.helix.controller.pipeline.StageException)

Example 90 with HelixManager

use of org.apache.helix.HelixManager in project helix by apache.

the class IntermediateStateCalcStage method validateMaxPartitionsPerInstance.

private void validateMaxPartitionsPerInstance(ClusterEvent event, ClusterDataCache cache, IntermediateStateOutput intermediateStateOutput, int maxPartitionPerInstance) {
    Map<String, PartitionStateMap> resourceStatesMap = intermediateStateOutput.getResourceStatesMap();
    Map<String, Integer> instancePartitionCounts = new HashMap<>();
    for (String resource : resourceStatesMap.keySet()) {
        IdealState idealState = cache.getIdealState(resource);
        if (idealState != null && idealState.getStateModelDefRef().equals(BuiltInStateModelDefinitions.Task.name())) {
            // ignore task here. Task has its own throttling logic
            continue;
        }
        PartitionStateMap partitionStateMap = resourceStatesMap.get(resource);
        Map<Partition, Map<String, String>> stateMaps = partitionStateMap.getStateMap();
        for (Partition p : stateMaps.keySet()) {
            Map<String, String> stateMap = stateMaps.get(p);
            for (String instance : stateMap.keySet()) {
                // ignore replica to be dropped.
                String state = stateMap.get(instance);
                if (state.equals(HelixDefinedState.DROPPED.name())) {
                    continue;
                }
                if (!instancePartitionCounts.containsKey(instance)) {
                    instancePartitionCounts.put(instance, 0);
                }
                int partitionCount = instancePartitionCounts.get(instance);
                partitionCount++;
                if (partitionCount > maxPartitionPerInstance) {
                    HelixManager manager = event.getAttribute(AttributeName.helixmanager.name());
                    String errMsg = String.format("Partition count to be assigned to instance %s is greater than %d. Stop rebalance and pause the cluster %s", instance, maxPartitionPerInstance, cache.getClusterName());
                    if (manager != null) {
                        manager.getClusterManagmentTool().enableMaintenanceMode(manager.getClusterName(), true, errMsg);
                    } else {
                        logger.error("Failed to pause cluster, HelixManager is not set!");
                    }
                    throw new HelixException(errMsg);
                }
                instancePartitionCounts.put(instance, partitionCount);
            }
        }
    }
}
Also used : HelixManager(org.apache.helix.HelixManager) PartitionStateMap(org.apache.helix.controller.common.PartitionStateMap) HelixException(org.apache.helix.HelixException) PartitionStateMap(org.apache.helix.controller.common.PartitionStateMap)

Aggregations

HelixManager (org.apache.helix.HelixManager)105 Test (org.testng.annotations.Test)44 HelixDataAccessor (org.apache.helix.HelixDataAccessor)35 ZNRecord (org.apache.helix.ZNRecord)27 Message (org.apache.helix.model.Message)23 PropertyKey (org.apache.helix.PropertyKey)20 Date (java.util.Date)18 ZKHelixDataAccessor (org.apache.helix.manager.zk.ZKHelixDataAccessor)17 Builder (org.apache.helix.PropertyKey.Builder)16 ArrayList (java.util.ArrayList)14 HashMap (java.util.HashMap)12 ExternalView (org.apache.helix.model.ExternalView)11 NotificationContext (org.apache.helix.NotificationContext)10 LiveInstance (org.apache.helix.model.LiveInstance)10 IdealState (org.apache.helix.model.IdealState)9 List (java.util.List)8 Criteria (org.apache.helix.Criteria)8 HelixAdmin (org.apache.helix.HelixAdmin)8 ZKHelixManager (org.apache.helix.manager.zk.ZKHelixManager)8 StringWriter (java.io.StringWriter)7