use of org.apache.hadoop.hbase.master.RegionPlan in project hbase by apache.
the class TestReportRegionStateTransitionRetry method testRetryOnClose.
@Test
public void testRetryOnClose() throws Exception {
RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo();
ProcedureExecutor<MasterProcedureEnv> procExec = UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
CountDownLatch latch = new CountDownLatch(1);
RESUME_AND_FAIL.set(latch);
Future<byte[]> future = am.moveAsync(new RegionPlan(region, rsn.getRegionLocation(), rsn.getRegionLocation()));
TransitRegionStateProcedure proc = procExec.getProcedures().stream().filter(p -> p instanceof TransitRegionStateProcedure).filter(p -> !p.isFinished()).map(p -> (TransitRegionStateProcedure) p).findAny().get();
// wait until we schedule the OpenRegionProcedure
UTIL.waitFor(10000, () -> proc.getCurrentStateId() == REGION_STATE_TRANSITION_CONFIRM_OPENED_VALUE);
// Fail the reportRegionStateTransition for closing
latch.countDown();
future.get();
// confirm that the region can still be write
try (Table table = UTIL.getConnection().getTableBuilder(NAME, null).setWriteRpcTimeout(1000).setOperationTimeout(2000).build()) {
table.put(new Put(Bytes.toBytes("key")).addColumn(CF, Bytes.toBytes("cq"), Bytes.toBytes("val")));
}
}
use of org.apache.hadoop.hbase.master.RegionPlan in project hbase by apache.
the class RSGroupInfoManagerImpl method moveRegionsBetweenGroups.
private <T> void moveRegionsBetweenGroups(Set<T> regionsOwners, Set<Address> newRegionsOwners, String targetGroupName, String sourceGroupName, Function<T, List<RegionInfo>> getRegionsInfo, Function<RegionInfo, Boolean> validation) throws IOException {
// Get server names corresponding to given Addresses
List<ServerName> movedServerNames = new ArrayList<>(regionsOwners.size());
List<ServerName> srcGrpServerNames = new ArrayList<>(newRegionsOwners.size());
for (ServerName serverName : masterServices.getServerManager().getOnlineServers().keySet()) {
// can have the same servers. So for all servers below both conditions to be checked
if (newRegionsOwners.contains(serverName.getAddress())) {
srcGrpServerNames.add(serverName);
}
if (regionsOwners.contains(serverName.getAddress())) {
movedServerNames.add(serverName);
}
}
List<Pair<RegionInfo, Future<byte[]>>> assignmentFutures = new ArrayList<>();
int retry = 0;
Set<String> failedRegions = new HashSet<>();
IOException toThrow = null;
do {
assignmentFutures.clear();
failedRegions.clear();
for (ServerName owner : movedServerNames) {
// Get regions that are associated with this server and filter regions by group tables.
for (RegionInfo region : getRegionsInfo.apply((T) owner.getAddress())) {
if (!validation.apply(region)) {
LOG.info("Moving region {}, which does not belong to RSGroup {}", region.getShortNameToLog(), targetGroupName);
// Move region back to source RSGroup servers
ServerName dest = masterServices.getLoadBalancer().randomAssignment(region, srcGrpServerNames);
if (dest == null) {
failedRegions.add(region.getRegionNameAsString());
continue;
}
RegionPlan rp = new RegionPlan(region, owner, dest);
try {
Future<byte[]> future = masterServices.getAssignmentManager().moveAsync(rp);
assignmentFutures.add(Pair.newPair(region, future));
} catch (IOException ioe) {
failedRegions.add(region.getRegionNameAsString());
LOG.debug("Move region {} failed, will retry, current retry time is {}", region.getShortNameToLog(), retry, ioe);
toThrow = ioe;
}
}
}
}
waitForRegionMovement(assignmentFutures, failedRegions, sourceGroupName, retry);
if (failedRegions.isEmpty()) {
LOG.info("All regions from {} are moved back to {}", movedServerNames, sourceGroupName);
return;
} else {
try {
wait(1000);
} catch (InterruptedException e) {
LOG.warn("Sleep interrupted", e);
Thread.currentThread().interrupt();
}
retry++;
}
} while (!failedRegions.isEmpty() && retry <= masterServices.getConfiguration().getInt(FAILED_MOVE_MAX_RETRY, DEFAULT_MAX_RETRY_VALUE));
// has up to max retry time or there are no more regions to move
if (!failedRegions.isEmpty()) {
// print failed moved regions, for later process conveniently
String msg = String.format("move regions for group %s failed, failed regions: %s", sourceGroupName, failedRegions);
LOG.error(msg);
throw new DoNotRetryIOException(msg + ", just record the last failed region's cause, more details in server log", toThrow);
}
}
use of org.apache.hadoop.hbase.master.RegionPlan in project hbase by apache.
the class SimpleLoadBalancer method balanceCluster.
/**
* Generate a global load balancing plan according to the specified map of
* server information to the most loaded regions of each server.
*
* The load balancing invariant is that all servers are within 1 region of the
* average number of regions per server. If the average is an integer number,
* all servers will be balanced to the average. Otherwise, all servers will
* have either floor(average) or ceiling(average) regions.
*
* HBASE-3609 Modeled regionsToMove using Guava's MinMaxPriorityQueue so that
* we can fetch from both ends of the queue.
* At the beginning, we check whether there was empty region server
* just discovered by Master. If so, we alternately choose new / old
* regions from head / tail of regionsToMove, respectively. This alternation
* avoids clustering young regions on the newly discovered region server.
* Otherwise, we choose new regions from head of regionsToMove.
*
* Another improvement from HBASE-3609 is that we assign regions from
* regionsToMove to underloaded servers in round-robin fashion.
* Previously one underloaded server would be filled before we move onto
* the next underloaded server, leading to clustering of young regions.
*
* Finally, we randomly shuffle underloaded servers so that they receive
* offloaded regions relatively evenly across calls to balanceCluster().
*
* The algorithm is currently implemented as such:
*
* <ol>
* <li>Determine the two valid numbers of regions each server should have,
* <b>MIN</b>=floor(average) and <b>MAX</b>=ceiling(average).
*
* <li>Iterate down the most loaded servers, shedding regions from each so
* each server hosts exactly <b>MAX</b> regions. Stop once you reach a
* server that already has <= <b>MAX</b> regions.
* <p>
* Order the regions to move from most recent to least.
*
* <li>Iterate down the least loaded servers, assigning regions so each server
* has exactly <b>MIN</b> regions. Stop once you reach a server that
* already has >= <b>MIN</b> regions.
*
* Regions being assigned to underloaded servers are those that were shed
* in the previous step. It is possible that there were not enough
* regions shed to fill each underloaded server to <b>MIN</b>. If so we
* end up with a number of regions required to do so, <b>neededRegions</b>.
*
* It is also possible that we were able to fill each underloaded but ended
* up with regions that were unassigned from overloaded servers but that
* still do not have assignment.
*
* If neither of these conditions hold (no regions needed to fill the
* underloaded servers, no regions leftover from overloaded servers),
* we are done and return. Otherwise we handle these cases below.
*
* <li>If <b>neededRegions</b> is non-zero (still have underloaded servers),
* we iterate the most loaded servers again, shedding a single server from
* each (this brings them from having <b>MAX</b> regions to having
* <b>MIN</b> regions).
*
* <li>We now definitely have more regions that need assignment, either from
* the previous step or from the original shedding from overloaded servers.
* Iterate the least loaded servers filling each to <b>MIN</b>.
*
* <li>If we still have more regions that need assignment, again iterate the
* least loaded servers, this time giving each one (filling them to
* <b>MAX</b>) until we run out.
*
* <li>All servers will now either host <b>MIN</b> or <b>MAX</b> regions.
*
* In addition, any server hosting >= <b>MAX</b> regions is guaranteed
* to end up with <b>MAX</b> regions at the end of the balancing. This
* ensures the minimal number of regions possible are moved.
* </ol>
*
* TODO: We can at-most reassign the number of regions away from a particular
* server to be how many they report as most loaded.
* Should we just keep all assignment in memory? Any objections?
* Does this mean we need HeapSize on HMaster? Or just careful monitor?
* (current thinking is we will hold all assignments in memory)
*
* @param clusterMap Map of regionservers and their load/region information to
* a list of their most loaded regions
* @return a list of regions to be moved, including source and destination,
* or null if cluster is already balanced
*/
@Override
public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterMap) {
List<RegionPlan> regionsToReturn = balanceMasterRegions(clusterMap);
if (regionsToReturn != null || clusterMap == null || clusterMap.size() <= 1) {
return regionsToReturn;
}
if (masterServerName != null && clusterMap.containsKey(masterServerName)) {
if (clusterMap.size() <= 2) {
return null;
}
clusterMap = new HashMap<>(clusterMap);
clusterMap.remove(masterServerName);
}
long startTime = System.currentTimeMillis();
// construct a Cluster object with clusterMap and rest of the
// argument as defaults
Cluster c = new Cluster(clusterMap, null, this.regionFinder, this.rackManager);
if (!this.needsBalance(c) && !this.overallNeedsBalance())
return null;
ClusterLoadState cs = new ClusterLoadState(clusterMap);
int numServers = cs.getNumServers();
NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
int numRegions = cs.getNumRegions();
float average = cs.getLoadAverage();
int max = (int) Math.ceil(average);
int min = (int) average;
// Using to check balance result.
StringBuilder strBalanceParam = new StringBuilder();
strBalanceParam.append("Balance parameter: numRegions=").append(numRegions).append(", numServers=").append(numServers).append(", max=").append(max).append(", min=").append(min);
LOG.debug(strBalanceParam.toString());
// Balance the cluster
// TODO: Look at data block locality or a more complex load to do this
MinMaxPriorityQueue<RegionPlan> regionsToMove = MinMaxPriorityQueue.orderedBy(rpComparator).create();
regionsToReturn = new ArrayList<>();
// Walk down most loaded, pruning each to the max
int serversOverloaded = 0;
// flag used to fetch regions from head and tail of list, alternately
boolean fetchFromTail = false;
Map<ServerName, BalanceInfo> serverBalanceInfo = new TreeMap<>();
for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) {
ServerAndLoad sal = server.getKey();
int load = sal.getLoad();
if (load <= max) {
serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0, server.getValue()));
continue;
}
serversOverloaded++;
List<HRegionInfo> regions = server.getValue();
int numToOffload = Math.min(load - max, regions.size());
// account for the out-of-band regions which were assigned to this server
// after some other region server crashed
Collections.sort(regions, riComparator);
int numTaken = 0;
for (int i = 0; i <= numToOffload; ) {
// fetch from head
HRegionInfo hri = regions.get(i);
if (fetchFromTail) {
hri = regions.get(regions.size() - 1 - i);
}
i++;
// Don't rebalance special regions.
if (shouldBeOnMaster(hri) && masterServerName.equals(sal.getServerName()))
continue;
regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null));
numTaken++;
if (numTaken >= numToOffload)
break;
}
serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(numToOffload, (-1) * numTaken, server.getValue()));
}
int totalNumMoved = regionsToMove.size();
// Walk down least loaded, filling each to the min
// number of regions needed to bring all up to min
int neededRegions = 0;
fetchFromTail = false;
Map<ServerName, Integer> underloadedServers = new HashMap<>();
int maxToTake = numRegions - min;
for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
// no more to take
if (maxToTake == 0)
break;
int load = server.getKey().getLoad();
if (load >= min) {
// look for other servers which haven't reached min
continue;
}
int regionsToPut = min - load;
maxToTake -= regionsToPut;
underloadedServers.put(server.getKey().getServerName(), regionsToPut);
}
// number of servers that get new regions
int serversUnderloaded = underloadedServers.size();
int incr = 1;
List<ServerName> sns = Arrays.asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded]));
Collections.shuffle(sns, RANDOM);
while (regionsToMove.size() > 0) {
int cnt = 0;
int i = incr > 0 ? 0 : underloadedServers.size() - 1;
for (; i >= 0 && i < underloadedServers.size(); i += incr) {
if (regionsToMove.isEmpty())
break;
ServerName si = sns.get(i);
int numToTake = underloadedServers.get(si);
if (numToTake == 0)
continue;
addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn);
underloadedServers.put(si, numToTake - 1);
cnt++;
BalanceInfo bi = serverBalanceInfo.get(si);
bi.setNumRegionsAdded(bi.getNumRegionsAdded() + 1);
}
if (cnt == 0)
break;
// iterates underloadedServers in the other direction
incr = -incr;
}
for (Integer i : underloadedServers.values()) {
// If we still want to take some, increment needed
neededRegions += i;
}
// If we need more to fill min, grab one from each most loaded until enough
if (neededRegions != 0) {
// Walk down most loaded, grabbing one from each until we get enough
for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) {
BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
int idx = balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload();
if (idx >= server.getValue().size())
break;
HRegionInfo region = server.getValue().get(idx);
// Don't move meta regions.
if (region.isMetaRegion())
continue;
regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null));
balanceInfo.setNumRegionsAdded(balanceInfo.getNumRegionsAdded() - 1);
balanceInfo.setNextRegionForUnload(balanceInfo.getNextRegionForUnload() + 1);
totalNumMoved++;
if (--neededRegions == 0) {
// No more regions needed, done shedding
break;
}
}
}
// Walk down least loaded, assigning to each to fill up to min
for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
int regionCount = server.getKey().getLoad();
if (regionCount >= min)
break;
BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
if (balanceInfo != null) {
regionCount += balanceInfo.getNumRegionsAdded();
}
if (regionCount >= min) {
continue;
}
int numToTake = min - regionCount;
int numTaken = 0;
while (numTaken < numToTake && 0 < regionsToMove.size()) {
addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn);
numTaken++;
}
}
if (min != max) {
balanceOverall(regionsToReturn, serverBalanceInfo, fetchFromTail, regionsToMove, max, min);
}
long endTime = System.currentTimeMillis();
if (!regionsToMove.isEmpty() || neededRegions != 0) {
// Emit data so can diagnose how balancer went astray.
LOG.warn("regionsToMove=" + totalNumMoved + ", numServers=" + numServers + ", serversOverloaded=" + serversOverloaded + ", serversUnderloaded=" + serversUnderloaded);
StringBuilder sb = new StringBuilder();
for (Map.Entry<ServerName, List<HRegionInfo>> e : clusterMap.entrySet()) {
if (sb.length() > 0)
sb.append(", ");
sb.append(e.getKey().toString());
sb.append(" ");
sb.append(e.getValue().size());
}
LOG.warn("Input " + sb.toString());
}
// All done!
LOG.info("Done. Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded + " less loaded servers");
return regionsToReturn;
}
use of org.apache.hadoop.hbase.master.RegionPlan in project hbase by apache.
the class StochasticLoadBalancer method createRegionPlans.
/**
* Create all of the RegionPlan's needed to move from the initial cluster state to the desired
* state.
*
* @param cluster The state of the cluster
* @return List of RegionPlan's that represent the moves needed to get to desired final state.
*/
private List<RegionPlan> createRegionPlans(Cluster cluster) {
List<RegionPlan> plans = new LinkedList<>();
for (int regionIndex = 0; regionIndex < cluster.regionIndexToServerIndex.length; regionIndex++) {
int initialServerIndex = cluster.initialRegionIndexToServerIndex[regionIndex];
int newServerIndex = cluster.regionIndexToServerIndex[regionIndex];
if (initialServerIndex != newServerIndex) {
HRegionInfo region = cluster.regions[regionIndex];
ServerName initialServer = cluster.servers[initialServerIndex];
ServerName newServer = cluster.servers[newServerIndex];
if (LOG.isTraceEnabled()) {
LOG.trace("Moving Region " + region.getEncodedName() + " from server " + initialServer.getHostname() + " to " + newServer.getHostname());
}
RegionPlan rp = new RegionPlan(region, initialServer, newServer);
plans.add(rp);
}
}
return plans;
}
use of org.apache.hadoop.hbase.master.RegionPlan in project hbase by apache.
the class StochasticLoadBalancer method balanceCluster.
/**
* Given the cluster state this will try and approach an optimal balance. This
* should always approach the optimal state given enough steps.
*/
@Override
public synchronized List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState) {
List<RegionPlan> plans = balanceMasterRegions(clusterState);
if (plans != null || clusterState == null || clusterState.size() <= 1) {
return plans;
}
if (masterServerName != null && clusterState.containsKey(masterServerName)) {
if (clusterState.size() <= 2) {
return null;
}
clusterState = new HashMap<>(clusterState);
clusterState.remove(masterServerName);
}
// On clusters with lots of HFileLinks or lots of reference files,
// instantiating the storefile infos can be quite expensive.
// Allow turning this feature off if the locality cost is not going to
// be used in any computations.
RegionLocationFinder finder = null;
if (this.localityCost != null && this.localityCost.getMultiplier() > 0) {
finder = this.regionFinder;
}
//The clusterState that is given to this method contains the state
//of all the regions in the table(s) (that's true today)
// Keep track of servers to iterate through them.
Cluster cluster = new Cluster(clusterState, loads, finder, rackManager);
long startTime = EnvironmentEdgeManager.currentTime();
initCosts(cluster);
if (!needsBalance(cluster)) {
return null;
}
double currentCost = computeCost(cluster, Double.MAX_VALUE);
curOverallCost = currentCost;
for (int i = 0; i < this.curFunctionCosts.length; i++) {
curFunctionCosts[i] = tempFunctionCosts[i];
}
LOG.info("start StochasticLoadBalancer.balancer, initCost=" + currentCost + ", functionCost=" + functionCost());
double initCost = currentCost;
double newCost = currentCost;
long computedMaxSteps = Math.min(this.maxSteps, ((long) cluster.numRegions * (long) this.stepsPerRegion * (long) cluster.numServers));
// Perform a stochastic walk to see if we can get a good fit.
long step;
for (step = 0; step < computedMaxSteps; step++) {
int generatorIdx = RANDOM.nextInt(candidateGenerators.length);
CandidateGenerator p = candidateGenerators[generatorIdx];
Cluster.Action action = p.generate(cluster);
if (action.type == Type.NULL) {
continue;
}
cluster.doAction(action);
updateCostsWithAction(cluster, action);
newCost = computeCost(cluster, currentCost);
// Should this be kept?
if (newCost < currentCost) {
currentCost = newCost;
// save for JMX
curOverallCost = currentCost;
for (int i = 0; i < this.curFunctionCosts.length; i++) {
curFunctionCosts[i] = tempFunctionCosts[i];
}
} else {
// Put things back the way they were before.
// TODO: undo by remembering old values
Action undoAction = action.undoAction();
cluster.doAction(undoAction);
updateCostsWithAction(cluster, undoAction);
}
if (EnvironmentEdgeManager.currentTime() - startTime > maxRunningTime) {
break;
}
}
long endTime = EnvironmentEdgeManager.currentTime();
metricsBalancer.balanceCluster(endTime - startTime);
// update costs metrics
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
if (initCost > currentCost) {
plans = createRegionPlans(cluster);
if (LOG.isDebugEnabled()) {
LOG.debug("Finished computing new load balance plan. Computation took " + (endTime - startTime) + "ms to try " + step + " different iterations. Found a solution that moves " + plans.size() + " regions; Going from a computed cost of " + initCost + " to a new cost of " + currentCost);
}
return plans;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Could not find a better load balance plan. Tried " + step + " different configurations in " + (endTime - startTime) + "ms, and did not find anything with a computed cost less than " + initCost);
}
return null;
}
Aggregations