use of org.apache.helix.HelixManager in project helix by apache.
the class AgentStateModel method genericStateTransitionHandler.
@Transition(to = "*", from = "*")
public void genericStateTransitionHandler(Message message, NotificationContext context) throws Exception {
// first try get command from message
String cmd = message.getRecord().getSimpleField(CommandAttribute.COMMAND.getName());
String workingDir = message.getRecord().getSimpleField(CommandAttribute.WORKING_DIR.getName());
String timeout = message.getRecord().getSimpleField(CommandAttribute.TIMEOUT.getName());
String pidFile = message.getRecord().getSimpleField(CommandAttribute.PID_FILE.getName());
HelixManager manager = context.getManager();
String clusterName = manager.getClusterName();
String fromState = message.getFromState();
String toState = message.getToState();
// construct keys for command-config
String cmdKey = buildKey(fromState, toState, CommandAttribute.COMMAND);
String workingDirKey = buildKey(fromState, toState, CommandAttribute.WORKING_DIR);
String timeoutKey = buildKey(fromState, toState, CommandAttribute.TIMEOUT);
String pidFileKey = buildKey(fromState, toState, CommandAttribute.PID_FILE);
List<String> cmdConfigKeys = Arrays.asList(cmdKey, workingDirKey, timeoutKey, pidFileKey);
// read command from resource-scope configures
if (cmd == null) {
HelixConfigScope resourceScope = new HelixConfigScopeBuilder(ConfigScopeProperty.RESOURCE).forCluster(clusterName).forResource(message.getResourceName()).build();
Map<String, String> cmdKeyValueMap = manager.getConfigAccessor().get(resourceScope, cmdConfigKeys);
if (cmdKeyValueMap != null) {
cmd = cmdKeyValueMap.get(cmdKey);
workingDir = cmdKeyValueMap.get(workingDirKey);
timeout = cmdKeyValueMap.get(timeoutKey);
pidFile = cmdKeyValueMap.get(pidFileKey);
}
}
// if resource-scope doesn't contain command, fall back to cluster-scope configures
if (cmd == null) {
HelixConfigScope clusterScope = new HelixConfigScopeBuilder(ConfigScopeProperty.CLUSTER).forCluster(clusterName).build();
Map<String, String> cmdKeyValueMap = manager.getConfigAccessor().get(clusterScope, cmdConfigKeys);
if (cmdKeyValueMap != null) {
cmd = cmdKeyValueMap.get(cmdKey);
workingDir = cmdKeyValueMap.get(workingDirKey);
timeout = cmdKeyValueMap.get(timeoutKey);
pidFile = cmdKeyValueMap.get(pidFileKey);
}
}
if (cmd == null) {
throw new Exception("Unable to find command for transition from:" + message.getFromState() + " to:" + message.getToState());
}
_logger.info("Executing command: " + cmd + ", using workingDir: " + workingDir + ", timeout: " + timeout + ", on " + manager.getInstanceName());
// skip nop command
if (cmd.equals(CommandAttribute.NOP.getName())) {
return;
}
// split the cmd to actual cmd and args[]
String[] cmdSplits = cmd.trim().split("\\s+");
String cmdValue = cmdSplits[0];
String[] args = Arrays.copyOfRange(cmdSplits, 1, cmdSplits.length);
// get the command-execution timeout
// 0 means wait for ever
long timeoutValue = 0;
if (timeout != null) {
try {
timeoutValue = Long.parseLong(timeout);
} catch (NumberFormatException e) {
// OK to use 0
}
}
ExternalCommand externalCmd = ExternalCommand.executeWithTimeout(new File(workingDir), cmdValue, timeoutValue, args);
int exitValue = externalCmd.exitValue();
if (_logger.isDebugEnabled()) {
_logger.debug("command: " + cmd + ", exitValue: " + exitValue + " output:\n" + externalCmd.getStringOutput());
}
// monitor pid if pidFile exists
if (pidFile == null) {
// no pid to monitor
return;
}
String pidFileValue = instantiateByMessage(pidFile, message);
String pid = SystemUtil.getPidFromFile(new File(pidFileValue));
if (pid != null) {
new ProcessMonitorThread(pid).start();
}
}
use of org.apache.helix.HelixManager in project helix by apache.
the class BestPossibleStateCalcStage method computeResourceBestPossibleState.
private boolean computeResourceBestPossibleState(ClusterEvent event, ClusterDataCache cache, CurrentStateOutput currentStateOutput, Resource resource, BestPossibleStateOutput output) {
// for each ideal state
// read the state model def
// for each resource
// get the preference list
// for each instanceName check if its alive then assign a state
String resourceName = resource.getResourceName();
logger.debug("Processing resource:" + resourceName);
// Ideal state may be gone. In that case we need to get the state model name
// from the current state
IdealState idealState = cache.getIdealState(resourceName);
if (idealState == null) {
// if ideal state is deleted, use an empty one
logger.info("resource:" + resourceName + " does not exist anymore");
idealState = new IdealState(resourceName);
idealState.setStateModelDefRef(resource.getStateModelDefRef());
}
Rebalancer rebalancer = getRebalancer(idealState, resourceName, cache.isMaintenanceModeEnabled());
MappingCalculator mappingCalculator = getMappingCalculator(rebalancer, resourceName);
if (rebalancer == null || mappingCalculator == null) {
logger.error("Error computing assignment for resource " + resourceName + ". no rebalancer found. rebalancer: " + rebalancer + " mappingCaculator: " + mappingCalculator);
}
if (rebalancer != null && mappingCalculator != null) {
if (rebalancer instanceof TaskRebalancer) {
TaskRebalancer taskRebalancer = TaskRebalancer.class.cast(rebalancer);
taskRebalancer.setClusterStatusMonitor((ClusterStatusMonitor) event.getAttribute(AttributeName.clusterStatusMonitor.name()));
}
ResourceAssignment partitionStateAssignment = null;
try {
HelixManager manager = event.getAttribute(AttributeName.helixmanager.name());
rebalancer.init(manager);
idealState = rebalancer.computeNewIdealState(resourceName, idealState, currentStateOutput, cache);
output.setPreferenceLists(resourceName, idealState.getPreferenceLists());
// Use the internal MappingCalculator interface to compute the final assignment
// The next release will support rebalancers that compute the mapping from start to finish
partitionStateAssignment = mappingCalculator.computeBestPossiblePartitionState(cache, idealState, resource, currentStateOutput);
for (Partition partition : resource.getPartitions()) {
Map<String, String> newStateMap = partitionStateAssignment.getReplicaMap(partition);
output.setState(resourceName, partition, newStateMap);
}
// Check if calculation is done successfully
return checkBestPossibleStateCalculation(idealState);
} catch (Exception e) {
logger.error("Error computing assignment for resource " + resourceName + ". Skipping.", e);
// TODO : remove this part after debugging NPE
StringBuilder sb = new StringBuilder();
sb.append(String.format("HelixManager is null : %s\n", event.getAttribute("helixmanager") == null));
sb.append(String.format("Rebalancer is null : %s\n", rebalancer == null));
sb.append(String.format("Calculated idealState is null : %s\n", idealState == null));
sb.append(String.format("MappingCaculator is null : %s\n", mappingCalculator == null));
sb.append(String.format("PartitionAssignment is null : %s\n", partitionStateAssignment == null));
sb.append(String.format("Output is null : %s\n", output == null));
logger.error(sb.toString());
}
}
// Exception or rebalancer is not found
return false;
}
use of org.apache.helix.HelixManager in project helix by apache.
the class BestPossibleStateCalcStage method compute.
private BestPossibleStateOutput compute(ClusterEvent event, Map<String, Resource> resourceMap, CurrentStateOutput currentStateOutput) {
ClusterDataCache cache = event.getAttribute(AttributeName.ClusterDataCache.name());
BestPossibleStateOutput output = new BestPossibleStateOutput();
PriorityQueue<ResourcePriority> resourcePriorityQueue = new PriorityQueue<>();
TaskDriver taskDriver = null;
HelixManager helixManager = event.getAttribute(AttributeName.helixmanager.name());
if (helixManager != null) {
taskDriver = new TaskDriver(helixManager);
}
for (Resource resource : resourceMap.values()) {
resourcePriorityQueue.add(new ResourcePriority(resource, cache.getIdealState(resource.getResourceName()), taskDriver));
}
final List<String> failureResources = new ArrayList<>();
Iterator<ResourcePriority> itr = resourcePriorityQueue.iterator();
while (itr.hasNext()) {
Resource resource = itr.next().getResource();
if (!computeResourceBestPossibleState(event, cache, currentStateOutput, resource, output)) {
failureResources.add(resource.getResourceName());
logger.warn("Failed to calculate best possible states for " + resource.getResourceName());
}
}
// Check and report if resource rebalance has failure
if (!cache.isTaskCache()) {
ClusterStatusMonitor clusterStatusMonitor = event.getAttribute(AttributeName.clusterStatusMonitor.name());
updateRebalanceStatus(!failureResources.isEmpty(), helixManager, cache, clusterStatusMonitor, "Failed to calculate best possible states for " + failureResources.size() + " resources.");
}
return output;
}
use of org.apache.helix.HelixManager in project helix by apache.
the class CompatibilityCheckStage method process.
@Override
public void process(ClusterEvent event) throws Exception {
HelixManager manager = event.getAttribute(AttributeName.helixmanager.name());
ClusterDataCache cache = event.getAttribute(AttributeName.ClusterDataCache.name());
if (manager == null || cache == null) {
throw new StageException("Missing attributes in event:" + event + ". Requires HelixManager | DataCache");
}
HelixManagerProperties properties = manager.getProperties();
Map<String, LiveInstance> liveInstanceMap = cache.getLiveInstances();
for (LiveInstance liveInstance : liveInstanceMap.values()) {
String participantVersion = liveInstance.getHelixVersion();
if (!properties.isParticipantCompatible(participantVersion)) {
String errorMsg = "incompatible participant. pipeline will not continue. " + "controller: " + manager.getInstanceName() + ", controllerVersion: " + properties.getVersion() + ", minimumSupportedParticipantVersion: " + properties.getProperty("miminum_supported_version.participant") + ", participant: " + liveInstance.getInstanceName() + ", participantVersion: " + participantVersion;
LOG.error(errorMsg);
throw new StageException(errorMsg);
}
}
}
use of org.apache.helix.HelixManager in project helix by apache.
the class IntermediateStateCalcStage method validateMaxPartitionsPerInstance.
private void validateMaxPartitionsPerInstance(ClusterEvent event, ClusterDataCache cache, IntermediateStateOutput intermediateStateOutput, int maxPartitionPerInstance) {
Map<String, PartitionStateMap> resourceStatesMap = intermediateStateOutput.getResourceStatesMap();
Map<String, Integer> instancePartitionCounts = new HashMap<>();
for (String resource : resourceStatesMap.keySet()) {
IdealState idealState = cache.getIdealState(resource);
if (idealState != null && idealState.getStateModelDefRef().equals(BuiltInStateModelDefinitions.Task.name())) {
// ignore task here. Task has its own throttling logic
continue;
}
PartitionStateMap partitionStateMap = resourceStatesMap.get(resource);
Map<Partition, Map<String, String>> stateMaps = partitionStateMap.getStateMap();
for (Partition p : stateMaps.keySet()) {
Map<String, String> stateMap = stateMaps.get(p);
for (String instance : stateMap.keySet()) {
// ignore replica to be dropped.
String state = stateMap.get(instance);
if (state.equals(HelixDefinedState.DROPPED.name())) {
continue;
}
if (!instancePartitionCounts.containsKey(instance)) {
instancePartitionCounts.put(instance, 0);
}
int partitionCount = instancePartitionCounts.get(instance);
partitionCount++;
if (partitionCount > maxPartitionPerInstance) {
HelixManager manager = event.getAttribute(AttributeName.helixmanager.name());
String errMsg = String.format("Partition count to be assigned to instance %s is greater than %d. Stop rebalance and pause the cluster %s", instance, maxPartitionPerInstance, cache.getClusterName());
if (manager != null) {
manager.getClusterManagmentTool().enableMaintenanceMode(manager.getClusterName(), true, errMsg);
} else {
logger.error("Failed to pause cluster, HelixManager is not set!");
}
throw new HelixException(errMsg);
}
instancePartitionCounts.put(instance, partitionCount);
}
}
}
}
Aggregations