use of org.apache.hadoop.hbase.master.RegionPlan in project hbase by apache.
the class RSGroupInfoManagerImpl method balanceRSGroup.
@Override
public BalanceResponse balanceRSGroup(String groupName, BalanceRequest request) throws IOException {
ServerManager serverManager = masterServices.getServerManager();
LoadBalancer balancer = masterServices.getLoadBalancer();
getRSGroupInfo(groupName);
BalanceResponse.Builder responseBuilder = BalanceResponse.newBuilder();
synchronized (balancer) {
// If balance not true, don't run balancer.
if (!masterServices.isBalancerOn() && !request.isDryRun()) {
return responseBuilder.build();
}
// Only allow one balance run at at time.
Map<String, RegionState> groupRIT = rsGroupGetRegionsInTransition(groupName);
if (groupRIT.size() > 0 && !request.isIgnoreRegionsInTransition()) {
LOG.debug("Not running balancer because {} region(s) in transition: {}", groupRIT.size(), StringUtils.abbreviate(masterServices.getAssignmentManager().getRegionStates().getRegionsInTransition().toString(), 256));
return responseBuilder.build();
}
if (serverManager.areDeadServersInProgress()) {
LOG.debug("Not running balancer because processing dead regionserver(s): {}", serverManager.getDeadServers());
return responseBuilder.build();
}
// We balance per group instead of per table
Map<TableName, Map<ServerName, List<RegionInfo>>> assignmentsByTable = getRSGroupAssignmentsByTable(masterServices.getTableStateManager(), groupName);
List<RegionPlan> plans = balancer.balanceCluster(assignmentsByTable);
boolean balancerRan = !plans.isEmpty();
responseBuilder.setBalancerRan(balancerRan).setMovesCalculated(plans.size());
if (balancerRan && !request.isDryRun()) {
LOG.info("RSGroup balance {} starting with plan count: {}", groupName, plans.size());
List<RegionPlan> executed = masterServices.executeRegionPlansWithThrottling(plans);
responseBuilder.setMovesExecuted(executed.size());
LOG.info("RSGroup balance " + groupName + " completed");
}
return responseBuilder.build();
}
}
use of org.apache.hadoop.hbase.master.RegionPlan in project hbase by apache.
the class SimpleLoadBalancer method balanceOverall.
/**
* If we need to balanceoverall, we need to add one more round to peel off one region from each
* max. Together with other regions left to be assigned, we distribute all regionToMove, to the RS
* that have less regions in whole cluster scope.
*/
private void balanceOverall(List<RegionPlan> regionsToReturn, Map<ServerName, BalanceInfo> serverBalanceInfo, boolean fetchFromTail, MinMaxPriorityQueue<RegionPlan> regionsToMove, int max, int min) {
// Step 1.
// A map to record the plan we have already got as status quo, in order to resolve a cyclic
// assignment pair,
// e.g. plan 1: A -> B, plan 2: B ->C => resolve plan1 to A -> C, remove plan2
Map<ServerName, List<Integer>> returnMap = new HashMap<>();
for (int i = 0; i < regionsToReturn.size(); i++) {
List<Integer> pos = returnMap.get(regionsToReturn.get(i).getDestination());
if (pos == null) {
pos = new ArrayList<>();
returnMap.put(regionsToReturn.get(i).getDestination(), pos);
}
pos.add(i);
}
// Each RS should have either max or min numbers of regions for this table.
for (int i = 0; i < serverLoadList.size(); i++) {
ServerAndLoad serverload = serverLoadList.get(i);
BalanceInfo balanceInfo = serverBalanceInfo.get(serverload.getServerName());
if (balanceInfo == null) {
continue;
}
setLoad(serverLoadList, i, balanceInfo.getNumRegionsAdded());
if (balanceInfo.getHriList().size() + balanceInfo.getNumRegionsAdded() == max) {
RegionInfo hriToPlan;
if (balanceInfo.getHriList().isEmpty()) {
LOG.debug("During balanceOverall, we found " + serverload.getServerName() + " has no RegionInfo, no operation needed");
continue;
} else if (balanceInfo.getNextRegionForUnload() >= balanceInfo.getHriList().size()) {
continue;
} else {
hriToPlan = balanceInfo.getHriList().get(balanceInfo.getNextRegionForUnload());
}
RegionPlan maxPlan = new RegionPlan(hriToPlan, serverload.getServerName(), null);
regionsToMove.add(maxPlan);
setLoad(serverLoadList, i, -1);
} else if (balanceInfo.getHriList().size() + balanceInfo.getNumRegionsAdded() > max || balanceInfo.getHriList().size() + balanceInfo.getNumRegionsAdded() < min) {
LOG.warn("Encounter incorrect region numbers after calculating move plan during balanceOverall, " + "for this table, " + serverload.getServerName() + " originally has " + balanceInfo.getHriList().size() + " regions and " + balanceInfo.getNumRegionsAdded() + " regions have been added. Yet, max =" + max + ", min =" + min + // should not happen
". Thus stop balance for this table");
return;
}
}
// Step 3. sort the ServerLoadList, the ArrayList hold overall load for each server.
// We only need to assign the regionsToMove to
// the first n = regionsToMove.size() RS that has least load.
Collections.sort(serverLoadList, new Comparator<ServerAndLoad>() {
@Override
public int compare(ServerAndLoad s1, ServerAndLoad s2) {
if (s1.getLoad() == s2.getLoad()) {
return 0;
} else {
return (s1.getLoad() > s2.getLoad()) ? 1 : -1;
}
}
});
// Step 4.
// Preparation before assign out all regionsToMove.
// We need to remove the plan that has the source RS equals to destination RS,
// since the source RS belongs to the least n loaded RS.
int assignLength = regionsToMove.size();
// A structure help to map ServerName to it's load and index in ServerLoadList
Map<ServerName, Pair<ServerAndLoad, Integer>> SnLoadMap = new HashMap<>();
for (int i = 0; i < serverLoadList.size(); i++) {
SnLoadMap.put(serverLoadList.get(i).getServerName(), new Pair<>(serverLoadList.get(i), i));
}
Pair<ServerAndLoad, Integer> shredLoad;
// A List to help mark the plan in regionsToMove that should be removed
List<RegionPlan> planToRemoveList = new ArrayList<>();
// A structure to record how many times a server becomes the source of a plan, from
// regionsToMove.
Map<ServerName, Integer> sourceMap = new HashMap<>();
// But we should keep in mind that the second plan from such RS should be kept.
for (RegionPlan plan : regionsToMove) {
// the source RS's load and index in ServerLoadList
shredLoad = SnLoadMap.get(plan.getSource());
if (!sourceMap.containsKey(plan.getSource())) {
sourceMap.put(plan.getSource(), 0);
}
sourceMap.put(plan.getSource(), sourceMap.get(plan.getSource()) + 1);
if (shredLoad.getSecond() < assignLength && sourceMap.get(plan.getSource()) == 1) {
planToRemoveList.add(plan);
// While marked as to be removed, the count should be add back to the source RS
setLoad(serverLoadList, shredLoad.getSecond(), 1);
}
}
// regionsToMove, due to the fact that regionsToMove is a MinMaxPriorityQueue.
for (RegionPlan planToRemove : planToRemoveList) {
regionsToMove.remove(planToRemove);
}
// while keeping table level balanced.
for (int i = 0; i < assignLength; i++) {
// step
if (sourceMap.containsKey(serverLoadList.get(i).getServerName())) {
continue;
}
addRegionPlan(regionsToMove, fetchFromTail, serverLoadList.get(i).getServerName(), regionsToReturn);
setLoad(serverLoadList, i, 1);
// resolve a possible cyclic assignment pair if we just produced one:
// e.g. plan1: A -> B, plan2: B -> C => resolve plan1 to A -> C and remove plan2
List<Integer> pos = returnMap.get(regionsToReturn.get(regionsToReturn.size() - 1).getSource());
if (pos != null && pos.size() != 0) {
regionsToReturn.get(pos.get(pos.size() - 1)).setDestination(regionsToReturn.get(regionsToReturn.size() - 1).getDestination());
pos.remove(pos.size() - 1);
regionsToReturn.remove(regionsToReturn.size() - 1);
}
}
// Done balance overall
}
use of org.apache.hadoop.hbase.master.RegionPlan in project hbase by apache.
the class SimpleLoadBalancer method balanceTable.
/**
* Generate a global load balancing plan according to the specified map of
* server information to the most loaded regions of each server.
*
* The load balancing invariant is that all servers are within 1 region of the
* average number of regions per server. If the average is an integer number,
* all servers will be balanced to the average. Otherwise, all servers will
* have either floor(average) or ceiling(average) regions.
*
* HBASE-3609 Modeled regionsToMove using Guava's MinMaxPriorityQueue so that
* we can fetch from both ends of the queue.
* At the beginning, we check whether there was empty region server
* just discovered by Master. If so, we alternately choose new / old
* regions from head / tail of regionsToMove, respectively. This alternation
* avoids clustering young regions on the newly discovered region server.
* Otherwise, we choose new regions from head of regionsToMove.
*
* Another improvement from HBASE-3609 is that we assign regions from
* regionsToMove to underloaded servers in round-robin fashion.
* Previously one underloaded server would be filled before we move onto
* the next underloaded server, leading to clustering of young regions.
*
* Finally, we randomly shuffle underloaded servers so that they receive
* offloaded regions relatively evenly across calls to balanceCluster().
*
* The algorithm is currently implemented as such:
*
* <ol>
* <li>Determine the two valid numbers of regions each server should have,
* <b>MIN</b>=floor(average) and <b>MAX</b>=ceiling(average).
*
* <li>Iterate down the most loaded servers, shedding regions from each so
* each server hosts exactly <b>MAX</b> regions. Stop once you reach a
* server that already has <= <b>MAX</b> regions.
* <p>
* Order the regions to move from most recent to least.
*
* <li>Iterate down the least loaded servers, assigning regions so each server
* has exactly <b>MIN</b> regions. Stop once you reach a server that
* already has >= <b>MIN</b> regions.
*
* Regions being assigned to underloaded servers are those that were shed
* in the previous step. It is possible that there were not enough
* regions shed to fill each underloaded server to <b>MIN</b>. If so we
* end up with a number of regions required to do so, <b>neededRegions</b>.
*
* It is also possible that we were able to fill each underloaded but ended
* up with regions that were unassigned from overloaded servers but that
* still do not have assignment.
*
* If neither of these conditions hold (no regions needed to fill the
* underloaded servers, no regions leftover from overloaded servers),
* we are done and return. Otherwise we handle these cases below.
*
* <li>If <b>neededRegions</b> is non-zero (still have underloaded servers),
* we iterate the most loaded servers again, shedding a single server from
* each (this brings them from having <b>MAX</b> regions to having
* <b>MIN</b> regions).
*
* <li>We now definitely have more regions that need assignment, either from
* the previous step or from the original shedding from overloaded servers.
* Iterate the least loaded servers filling each to <b>MIN</b>.
*
* <li>If we still have more regions that need assignment, again iterate the
* least loaded servers, this time giving each one (filling them to
* <b>MAX</b>) until we run out.
*
* <li>All servers will now either host <b>MIN</b> or <b>MAX</b> regions.
*
* In addition, any server hosting >= <b>MAX</b> regions is guaranteed
* to end up with <b>MAX</b> regions at the end of the balancing. This
* ensures the minimal number of regions possible are moved.
* </ol>
*
* TODO: We can at-most reassign the number of regions away from a particular
* server to be how many they report as most loaded.
* Should we just keep all assignment in memory? Any objections?
* Does this mean we need HeapSize on HMaster? Or just careful monitor?
* (current thinking is we will hold all assignments in memory)
*
* @param loadOfOneTable Map of regionservers and their load/region information to
* a list of their most loaded regions
* @return a list of regions to be moved, including source and destination,
* or null if cluster is already balanced
*/
@Override
protected List<RegionPlan> balanceTable(TableName tableName, Map<ServerName, List<RegionInfo>> loadOfOneTable) {
long startTime = EnvironmentEdgeManager.currentTime();
// construct a Cluster object with clusterMap and rest of the
// argument as defaults
BalancerClusterState c = new BalancerClusterState(loadOfOneTable, null, this.regionFinder, this.rackManager);
if (!needsBalance(c) && !this.overallNeedsBalance()) {
return null;
}
ClusterLoadState cs = new ClusterLoadState(loadOfOneTable);
int numServers = cs.getNumServers();
NavigableMap<ServerAndLoad, List<RegionInfo>> serversByLoad = cs.getServersByLoad();
int numRegions = cs.getNumRegions();
float average = cs.getLoadAverage();
int max = (int) Math.ceil(average);
int min = (int) average;
// Using to check balance result.
StringBuilder strBalanceParam = new StringBuilder();
strBalanceParam.append("Balance parameter: numRegions=").append(numRegions).append(", numServers=").append(numServers).append(", max=").append(max).append(", min=").append(min);
LOG.debug(strBalanceParam.toString());
// Balance the cluster
// TODO: Look at data block locality or a more complex load to do this
MinMaxPriorityQueue<RegionPlan> regionsToMove = MinMaxPriorityQueue.orderedBy(rpComparator).create();
List<RegionPlan> regionsToReturn = new ArrayList<>();
// Walk down most loaded, pruning each to the max
int serversOverloaded = 0;
// flag used to fetch regions from head and tail of list, alternately
boolean fetchFromTail = false;
Map<ServerName, BalanceInfo> serverBalanceInfo = new TreeMap<>();
for (Map.Entry<ServerAndLoad, List<RegionInfo>> server : serversByLoad.descendingMap().entrySet()) {
ServerAndLoad sal = server.getKey();
int load = sal.getLoad();
if (load <= max) {
serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0, server.getValue()));
continue;
}
serversOverloaded++;
List<RegionInfo> regions = server.getValue();
int numToOffload = Math.min(load - max, regions.size());
// account for the out-of-band regions which were assigned to this server
// after some other region server crashed
Collections.sort(regions, riComparator);
int numTaken = 0;
for (int i = 0; i <= numToOffload; ) {
// fetch from head
RegionInfo hri = regions.get(i);
if (fetchFromTail) {
hri = regions.get(regions.size() - 1 - i);
}
i++;
regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null));
numTaken++;
if (numTaken >= numToOffload) {
break;
}
}
serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(numToOffload, -numTaken, server.getValue()));
}
int totalNumMoved = regionsToMove.size();
// Walk down least loaded, filling each to the min
// number of regions needed to bring all up to min
int neededRegions = 0;
fetchFromTail = false;
Map<ServerName, Integer> underloadedServers = new HashMap<>();
int maxToTake = numRegions - min;
for (Map.Entry<ServerAndLoad, List<RegionInfo>> server : serversByLoad.entrySet()) {
if (maxToTake == 0) {
// no more to take
break;
}
int load = server.getKey().getLoad();
if (load >= min) {
// look for other servers which haven't reached min
continue;
}
int regionsToPut = min - load;
maxToTake -= regionsToPut;
underloadedServers.put(server.getKey().getServerName(), regionsToPut);
}
// number of servers that get new regions
int serversUnderloaded = underloadedServers.size();
int incr = 1;
List<ServerName> sns = Arrays.asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded]));
Collections.shuffle(sns);
while (regionsToMove.size() > 0) {
int cnt = 0;
int i = incr > 0 ? 0 : underloadedServers.size() - 1;
for (; i >= 0 && i < underloadedServers.size(); i += incr) {
if (regionsToMove.isEmpty()) {
break;
}
ServerName si = sns.get(i);
int numToTake = underloadedServers.get(si);
if (numToTake == 0) {
continue;
}
addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn);
underloadedServers.put(si, numToTake - 1);
cnt++;
BalanceInfo bi = serverBalanceInfo.get(si);
bi.setNumRegionsAdded(bi.getNumRegionsAdded() + 1);
}
if (cnt == 0) {
break;
}
// iterates underloadedServers in the other direction
incr = -incr;
}
for (Integer i : underloadedServers.values()) {
// If we still want to take some, increment needed
neededRegions += i;
}
// If we need more to fill min, grab one from each most loaded until enough
if (neededRegions != 0) {
// Walk down most loaded, grabbing one from each until we get enough
for (Map.Entry<ServerAndLoad, List<RegionInfo>> server : serversByLoad.descendingMap().entrySet()) {
BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
int idx = balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload();
if (idx >= server.getValue().size()) {
break;
}
RegionInfo region = server.getValue().get(idx);
if (region.isMetaRegion()) {
// Don't move meta regions.
continue;
}
regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null));
balanceInfo.setNumRegionsAdded(balanceInfo.getNumRegionsAdded() - 1);
balanceInfo.setNextRegionForUnload(balanceInfo.getNextRegionForUnload() + 1);
totalNumMoved++;
if (--neededRegions == 0) {
// No more regions needed, done shedding
break;
}
}
}
// Walk down least loaded, assigning to each to fill up to min
for (Map.Entry<ServerAndLoad, List<RegionInfo>> server : serversByLoad.entrySet()) {
int regionCount = server.getKey().getLoad();
if (regionCount >= min) {
break;
}
BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
if (balanceInfo != null) {
regionCount += balanceInfo.getNumRegionsAdded();
}
if (regionCount >= min) {
continue;
}
int numToTake = min - regionCount;
int numTaken = 0;
while (numTaken < numToTake && 0 < regionsToMove.size()) {
addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn);
numTaken++;
balanceInfo.setNumRegionsAdded(balanceInfo.getNumRegionsAdded() + 1);
}
}
if (min != max) {
balanceOverall(regionsToReturn, serverBalanceInfo, fetchFromTail, regionsToMove, max, min);
}
long endTime = EnvironmentEdgeManager.currentTime();
if (!regionsToMove.isEmpty() || neededRegions != 0) {
// Emit data so can diagnose how balancer went astray.
LOG.warn("regionsToMove=" + totalNumMoved + ", numServers=" + numServers + ", serversOverloaded=" + serversOverloaded + ", serversUnderloaded=" + serversUnderloaded);
StringBuilder sb = new StringBuilder();
for (Map.Entry<ServerName, List<RegionInfo>> e : loadOfOneTable.entrySet()) {
if (sb.length() > 0) {
sb.append(", ");
}
sb.append(e.getKey().toString());
sb.append(" ");
sb.append(e.getValue().size());
}
LOG.warn("Input " + sb.toString());
}
// All done!
LOG.info("Done. Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded + " less loaded servers");
return regionsToReturn;
}
use of org.apache.hadoop.hbase.master.RegionPlan in project hbase by apache.
the class TestStochasticLoadBalancer method testCPRequestCost.
@Test
public void testCPRequestCost() {
// in order to pass needsBalance judgement
conf.setFloat("hbase.master.balancer.stochastic.cpRequestCost", 10000f);
loadBalancer.onConfigurationChange(conf);
// mock cluster State
Map<ServerName, List<RegionInfo>> clusterState = new HashMap<ServerName, List<RegionInfo>>();
ServerName serverA = randomServer(3).getServerName();
ServerName serverB = randomServer(3).getServerName();
ServerName serverC = randomServer(3).getServerName();
List<RegionInfo> regionsOnServerA = randomRegions(3);
List<RegionInfo> regionsOnServerB = randomRegions(3);
List<RegionInfo> regionsOnServerC = randomRegions(3);
clusterState.put(serverA, regionsOnServerA);
clusterState.put(serverB, regionsOnServerB);
clusterState.put(serverC, regionsOnServerC);
// mock ClusterMetrics
Map<ServerName, ServerMetrics> serverMetricsMap = new TreeMap<>();
serverMetricsMap.put(serverA, mockServerMetricsWithCpRequests(regionsOnServerA, 0));
serverMetricsMap.put(serverB, mockServerMetricsWithCpRequests(regionsOnServerB, 0));
serverMetricsMap.put(serverC, mockServerMetricsWithCpRequests(regionsOnServerC, 0));
ClusterMetrics clusterStatus = mock(ClusterMetrics.class);
when(clusterStatus.getLiveServerMetrics()).thenReturn(serverMetricsMap);
loadBalancer.updateClusterMetrics(clusterStatus);
// CPRequestCostFunction are Rate based, So doing setClusterMetrics again
// this time, regions on serverA with more cpRequestCount load
// serverA : 1000,1000,1000
// serverB : 0,0,0
// serverC : 0,0,0
// so should move two regions from serverA to serverB & serverC
serverMetricsMap = new TreeMap<>();
serverMetricsMap.put(serverA, mockServerMetricsWithCpRequests(regionsOnServerA, 1000));
serverMetricsMap.put(serverB, mockServerMetricsWithCpRequests(regionsOnServerB, 0));
serverMetricsMap.put(serverC, mockServerMetricsWithCpRequests(regionsOnServerC, 0));
clusterStatus = mock(ClusterMetrics.class);
when(clusterStatus.getLiveServerMetrics()).thenReturn(serverMetricsMap);
loadBalancer.updateClusterMetrics(clusterStatus);
List<RegionPlan> plans = loadBalancer.balanceTable(HConstants.ENSEMBLE_TABLE_NAME, clusterState);
Set<RegionInfo> regionsMoveFromServerA = new HashSet<>();
Set<ServerName> targetServers = new HashSet<>();
for (RegionPlan plan : plans) {
if (plan.getSource().equals(serverA)) {
regionsMoveFromServerA.add(plan.getRegionInfo());
targetServers.add(plan.getDestination());
}
}
// should move 2 regions from serverA, one moves to serverB, the other moves to serverC
assertEquals(2, regionsMoveFromServerA.size());
assertEquals(2, targetServers.size());
assertTrue(regionsOnServerA.containsAll(regionsMoveFromServerA));
// reset config
conf.setFloat("hbase.master.balancer.stochastic.cpRequestCost", 5f);
loadBalancer.onConfigurationChange(conf);
}
use of org.apache.hadoop.hbase.master.RegionPlan in project hbase by apache.
the class StochasticLoadBalancer method createRegionPlans.
/**
* Create all of the RegionPlan's needed to move from the initial cluster state to the desired
* state.
*
* @param cluster The state of the cluster
* @return List of RegionPlan's that represent the moves needed to get to desired final state.
*/
private List<RegionPlan> createRegionPlans(BalancerClusterState cluster) {
List<RegionPlan> plans = new ArrayList<>();
for (int regionIndex = 0; regionIndex < cluster.regionIndexToServerIndex.length; regionIndex++) {
int initialServerIndex = cluster.initialRegionIndexToServerIndex[regionIndex];
int newServerIndex = cluster.regionIndexToServerIndex[regionIndex];
if (initialServerIndex != newServerIndex) {
RegionInfo region = cluster.regions[regionIndex];
ServerName initialServer = cluster.servers[initialServerIndex];
ServerName newServer = cluster.servers[newServerIndex];
if (LOG.isTraceEnabled()) {
LOG.trace("Moving Region " + region.getEncodedName() + " from server " + initialServer.getHostname() + " to " + newServer.getHostname());
}
RegionPlan rp = new RegionPlan(region, initialServer, newServer);
plans.add(rp);
}
}
return plans;
}
Aggregations