use of org.apache.helix.controller.rebalancer.topology.Node in project helix by apache.
the class CrushRebalanceStrategy method computePartitionAssignment.
/**
* Compute the preference lists and (optional partition-state mapping) for the given resource.
*
* @param allNodes All instances
* @param liveNodes List of live instances
* @param currentMapping current replica mapping
* @param clusterData cluster data
* @return
* @throws HelixException if a map can not be found
*/
@Override
public ZNRecord computePartitionAssignment(final List<String> allNodes, final List<String> liveNodes, final Map<String, Map<String, String>> currentMapping, ClusterDataCache clusterData) throws HelixException {
Map<String, InstanceConfig> instanceConfigMap = clusterData.getInstanceConfigMap();
_clusterTopo = new Topology(allNodes, liveNodes, instanceConfigMap, clusterData.getClusterConfig());
Node topNode = _clusterTopo.getRootNode();
Map<String, List<String>> newPreferences = new HashMap<String, List<String>>();
for (int i = 0; i < _partitions.size(); i++) {
String partitionName = _partitions.get(i);
long data = partitionName.hashCode();
// apply the placement rules
List<Node> selected = select(topNode, data, _replicas);
if (selected.size() < _replicas) {
Log.error(String.format("Can not find enough node for resource %s partition %s, required %d, find %d", _resourceName, partitionName, _replicas, selected.size()));
}
List<String> nodeList = new ArrayList<String>();
for (int j = 0; j < selected.size(); j++) {
nodeList.add(selected.get(j).getName());
}
newPreferences.put(partitionName, nodeList);
}
ZNRecord result = new ZNRecord(_resourceName);
result.setListFields(newPreferences);
return result;
}
use of org.apache.helix.controller.rebalancer.topology.Node in project helix by apache.
the class MultiRoundCrushRebalanceStrategy method recalculateWeight.
private Node recalculateWeight(Node zone, long totalWeight, int totalPartition, Map<Node, List<String>> nodePartitionsMap, List<String> partitions, Map<Node, List<String>> toRemovedMap) {
Map<String, Integer> newNodeWeight = new HashMap<>();
Set<String> completedNodes = new HashSet<>();
for (Node node : Topology.getAllLeafNodes(zone)) {
if (node.isFailed()) {
completedNodes.add(node.getName());
continue;
}
long weight = node.getWeight();
double ratio = ((double) weight) / (double) totalWeight;
int target = (int) Math.floor(ratio * totalPartition);
List<String> assignedPatitions = nodePartitionsMap.get(node);
int numPartitions = 0;
if (assignedPatitions != null) {
numPartitions = assignedPatitions.size();
}
if (numPartitions > target + 1) {
int remove = numPartitions - target - 1;
Collections.sort(partitions);
List<String> toRemoved = new ArrayList<>(assignedPatitions.subList(0, remove));
toRemovedMap.put(node, toRemoved);
}
int missing = target - numPartitions;
if (missing > 0) {
newNodeWeight.put(node.getName(), missing * 10);
} else {
completedNodes.add(node.getName());
}
}
if (!newNodeWeight.isEmpty()) {
// iterate more
return _clusterTopo.clone(zone, newNodeWeight, completedNodes);
}
// If the newNodeWeight map is empty, it will use old root tree to calculate it.
return zone;
}
use of org.apache.helix.controller.rebalancer.topology.Node in project helix by apache.
the class MultiRoundCrushRebalanceStrategy method computePartitionAssignment.
/**
* Compute the preference lists and (optional partition-state mapping) for the given resource.
*
* @param allNodes All instances
* @param liveNodes List of live instances
* @param currentMapping current replica mapping
* @param clusterData cluster data
* @return
* @throws HelixException if a map can not be found
*/
@Override
public ZNRecord computePartitionAssignment(final List<String> allNodes, final List<String> liveNodes, final Map<String, Map<String, String>> currentMapping, ClusterDataCache clusterData) throws HelixException {
Map<String, InstanceConfig> instanceConfigMap = clusterData.getInstanceConfigMap();
_clusterTopo = new Topology(allNodes, liveNodes, instanceConfigMap, clusterData.getClusterConfig());
Node root = _clusterTopo.getRootNode();
Map<String, List<Node>> zoneMapping = new HashMap<String, List<Node>>();
for (int i = 0; i < _partitions.size(); i++) {
String partitionName = _partitions.get(i);
long pData = partitionName.hashCode();
// select zones for this partition
List<Node> zones = select(root, _clusterTopo.getFaultZoneType(), pData, _replicas);
zoneMapping.put(partitionName, zones);
}
/* map the position in preference list to the state */
Map<Integer, String> idxStateMap = new HashMap<Integer, String>();
int i = 0;
for (Map.Entry<String, Integer> e : _stateCountMap.entrySet()) {
String state = e.getKey();
int count = e.getValue();
for (int j = 0; j < count; j++) {
idxStateMap.put(i + j, state);
}
i += count;
}
// Final mapping <partition, state> -> list(node)
Map<String, Map<String, List<Node>>> partitionStateMapping = new HashMap<String, Map<String, List<Node>>>();
for (Node zone : _clusterTopo.getFaultZones()) {
// partition state -> list(partitions)
LinkedHashMap<String, List<String>> statePartitionMap = new LinkedHashMap<String, List<String>>();
// TODO: move this outside?
for (Map.Entry<String, List<Node>> e : zoneMapping.entrySet()) {
String partition = e.getKey();
List<Node> zones = e.getValue();
for (int k = 0; k < zones.size(); k++) {
if (zones.get(k).equals(zone)) {
String state = idxStateMap.get(k);
if (!statePartitionMap.containsKey(state)) {
statePartitionMap.put(state, new ArrayList<String>());
}
statePartitionMap.get(state).add(partition);
}
}
}
for (String state : _stateCountMap.keySet()) {
List<String> partitions = statePartitionMap.get(state);
if (partitions != null && !partitions.isEmpty()) {
Map<String, Node> assignments = singleZoneMapping(zone, partitions);
for (String partition : assignments.keySet()) {
Node node = assignments.get(partition);
if (!partitionStateMapping.containsKey(partition)) {
partitionStateMapping.put(partition, new HashMap<String, List<Node>>());
}
Map<String, List<Node>> stateMapping = partitionStateMapping.get(partition);
if (!stateMapping.containsKey(state)) {
stateMapping.put(state, new ArrayList<Node>());
}
stateMapping.get(state).add(node);
}
}
}
}
return generateZNRecord(_resourceName, _partitions, partitionStateMapping);
}
use of org.apache.helix.controller.rebalancer.topology.Node in project helix by apache.
the class CRUSHPlacementAlgorithm method select.
public List<Node> select(Node parent, long input, int count, String type, Predicate<Node> nodePredicate) {
int childCount = parent.getChildrenCount(type);
if (childCount < count) {
logger.error(count + " nodes of type " + type + " were requested but the tree has only " + childCount + " nodes!");
}
List<Node> selected = new ArrayList<Node>(count);
// use the index stored in the map
Integer offset;
if (keepOffset) {
offset = roundOffset.get(input);
if (offset == null) {
offset = 0;
roundOffset.put(input, offset);
}
} else {
offset = 0;
}
int rPrime = 0;
for (int r = 1; r <= count; r++) {
int failure = 0;
// number of times we had to loop back to the origin
int loopbackCount = 0;
boolean escape = false;
boolean retryOrigin;
Node out = null;
do {
// initialize at the outset
retryOrigin = false;
Node in = parent;
Set<Node> rejected = new HashSet<Node>();
boolean retryNode;
do {
// initialize at the outset
retryNode = false;
rPrime = r + offset + failure;
logger.trace("{}.select({}, {})", new Object[] { in, input, rPrime });
Selector selector = new Selector(in);
out = selector.select(input, rPrime);
if (!out.getType().equalsIgnoreCase(type)) {
logger.trace("selected output {} for data {} didn't match the type {}: walking down " + "the hierarchy...", new Object[] { out, input, type });
// walk down the hierarchy
in = out;
// stay within the node and walk down the tree
retryNode = true;
} else {
// type matches
boolean predicateRejected = !nodePredicate.apply(out);
if (selected.contains(out) || predicateRejected) {
if (predicateRejected) {
logger.trace("{} was rejected by the node predicate for data {}: rejecting and " + "increasing rPrime", out, input);
rejected.add(out);
} else {
// already selected
logger.trace("{} was already selected for data {}: rejecting and increasing rPrime", out, input);
}
// case we should loop back to the origin and start over
if (allChildNodesEliminated(in, selected, rejected)) {
logger.trace("all child nodes of {} have been eliminated", in);
if (loopbackCount == MAX_LOOPBACK_COUNT) {
// we looped back the maximum times we specified; we give up search, and exit
escape = true;
break;
}
loopbackCount++;
logger.trace("looping back to the original parent node ({})", parent);
retryOrigin = true;
} else {
// go back and reselect on the same parent
retryNode = true;
}
failure++;
} else if (nodeIsOut(out)) {
logger.trace("{} is marked as out (failed or over the maximum assignment) for data " + "{}! looping back to the original parent node", out, input);
failure++;
if (loopbackCount == MAX_LOOPBACK_COUNT) {
// we looped back the maximum times we specified; we give up search, and exit
escape = true;
break;
}
loopbackCount++;
// re-selection on the same parent is detrimental in case of node failure: loop back
// to the origin
retryOrigin = true;
} else {
// we got a successful selection
break;
}
}
} while (retryNode);
} while (retryOrigin);
if (escape) {
// cannot find a node under this parent; return a smaller set than was intended
logger.debug("we could not select a node for data {} under parent {}; a smaller data set " + "than is requested will be returned", input, parent);
continue;
}
logger.trace("{} was selected for data {}", out, input);
selected.add(out);
}
if (keepOffset) {
roundOffset.put(input, rPrime);
}
return selected;
}
Aggregations