Search in sources :

Example 16 with RegionPlan

use of org.apache.hadoop.hbase.master.RegionPlan in project hbase by apache.

the class TestReportRegionStateTransitionRetry method testRetryOnClose.

@Test
public void testRetryOnClose() throws Exception {
    RegionInfo region = UTIL.getMiniHBaseCluster().getRegions(NAME).get(0).getRegionInfo();
    ProcedureExecutor<MasterProcedureEnv> procExec = UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
    RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region);
    CountDownLatch latch = new CountDownLatch(1);
    RESUME_AND_FAIL.set(latch);
    Future<byte[]> future = am.moveAsync(new RegionPlan(region, rsn.getRegionLocation(), rsn.getRegionLocation()));
    TransitRegionStateProcedure proc = procExec.getProcedures().stream().filter(p -> p instanceof TransitRegionStateProcedure).filter(p -> !p.isFinished()).map(p -> (TransitRegionStateProcedure) p).findAny().get();
    // wait until we schedule the OpenRegionProcedure
    UTIL.waitFor(10000, () -> proc.getCurrentStateId() == REGION_STATE_TRANSITION_CONFIRM_OPENED_VALUE);
    // Fail the reportRegionStateTransition for closing
    latch.countDown();
    future.get();
    // confirm that the region can still be write
    try (Table table = UTIL.getConnection().getTableBuilder(NAME, null).setWriteRpcTimeout(1000).setOperationTimeout(2000).build()) {
        table.put(new Put(Bytes.toBytes("key")).addColumn(CF, Bytes.toBytes("cq"), Bytes.toBytes("val")));
    }
}
Also used : BeforeClass(org.junit.BeforeClass) REGION_STATE_TRANSITION_CONFIRM_OPENED_VALUE(org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED_VALUE) ReportRegionStateTransitionResponse(org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse) AtomicReference(java.util.concurrent.atomic.AtomicReference) Future(java.util.concurrent.Future) HConstants(org.apache.hadoop.hbase.HConstants) ProcedureExecutor(org.apache.hadoop.hbase.procedure2.ProcedureExecutor) PleaseHoldException(org.apache.hadoop.hbase.PleaseHoldException) Configuration(org.apache.hadoop.conf.Configuration) MasterServices(org.apache.hadoop.hbase.master.MasterServices) ClassRule(org.junit.ClassRule) Bytes(org.apache.hadoop.hbase.util.Bytes) TableName(org.apache.hadoop.hbase.TableName) MasterRegion(org.apache.hadoop.hbase.master.region.MasterRegion) AfterClass(org.junit.AfterClass) HBaseTestingUtil(org.apache.hadoop.hbase.HBaseTestingUtil) MediumTests(org.apache.hadoop.hbase.testclassification.MediumTests) Put(org.apache.hadoop.hbase.client.Put) HBaseClassTestRule(org.apache.hadoop.hbase.HBaseClassTestRule) IOException(java.io.IOException) Test(org.junit.Test) MasterProcedureEnv(org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv) Category(org.junit.experimental.categories.Category) ReportRegionStateTransitionRequest(org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest) CountDownLatch(java.util.concurrent.CountDownLatch) RegionPlan(org.apache.hadoop.hbase.master.RegionPlan) MasterTests(org.apache.hadoop.hbase.testclassification.MasterTests) Table(org.apache.hadoop.hbase.client.Table) RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) HMaster(org.apache.hadoop.hbase.master.HMaster) Table(org.apache.hadoop.hbase.client.Table) RegionPlan(org.apache.hadoop.hbase.master.RegionPlan) RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) MasterProcedureEnv(org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv) CountDownLatch(java.util.concurrent.CountDownLatch) Put(org.apache.hadoop.hbase.client.Put) Test(org.junit.Test)

Example 17 with RegionPlan

use of org.apache.hadoop.hbase.master.RegionPlan in project hbase by apache.

the class RSGroupInfoManagerImpl method moveRegionsBetweenGroups.

private <T> void moveRegionsBetweenGroups(Set<T> regionsOwners, Set<Address> newRegionsOwners, String targetGroupName, String sourceGroupName, Function<T, List<RegionInfo>> getRegionsInfo, Function<RegionInfo, Boolean> validation) throws IOException {
    // Get server names corresponding to given Addresses
    List<ServerName> movedServerNames = new ArrayList<>(regionsOwners.size());
    List<ServerName> srcGrpServerNames = new ArrayList<>(newRegionsOwners.size());
    for (ServerName serverName : masterServices.getServerManager().getOnlineServers().keySet()) {
        // can have the same servers. So for all servers below both conditions to be checked
        if (newRegionsOwners.contains(serverName.getAddress())) {
            srcGrpServerNames.add(serverName);
        }
        if (regionsOwners.contains(serverName.getAddress())) {
            movedServerNames.add(serverName);
        }
    }
    List<Pair<RegionInfo, Future<byte[]>>> assignmentFutures = new ArrayList<>();
    int retry = 0;
    Set<String> failedRegions = new HashSet<>();
    IOException toThrow = null;
    do {
        assignmentFutures.clear();
        failedRegions.clear();
        for (ServerName owner : movedServerNames) {
            // Get regions that are associated with this server and filter regions by group tables.
            for (RegionInfo region : getRegionsInfo.apply((T) owner.getAddress())) {
                if (!validation.apply(region)) {
                    LOG.info("Moving region {}, which does not belong to RSGroup {}", region.getShortNameToLog(), targetGroupName);
                    // Move region back to source RSGroup servers
                    ServerName dest = masterServices.getLoadBalancer().randomAssignment(region, srcGrpServerNames);
                    if (dest == null) {
                        failedRegions.add(region.getRegionNameAsString());
                        continue;
                    }
                    RegionPlan rp = new RegionPlan(region, owner, dest);
                    try {
                        Future<byte[]> future = masterServices.getAssignmentManager().moveAsync(rp);
                        assignmentFutures.add(Pair.newPair(region, future));
                    } catch (IOException ioe) {
                        failedRegions.add(region.getRegionNameAsString());
                        LOG.debug("Move region {} failed, will retry, current retry time is {}", region.getShortNameToLog(), retry, ioe);
                        toThrow = ioe;
                    }
                }
            }
        }
        waitForRegionMovement(assignmentFutures, failedRegions, sourceGroupName, retry);
        if (failedRegions.isEmpty()) {
            LOG.info("All regions from {} are moved back to {}", movedServerNames, sourceGroupName);
            return;
        } else {
            try {
                wait(1000);
            } catch (InterruptedException e) {
                LOG.warn("Sleep interrupted", e);
                Thread.currentThread().interrupt();
            }
            retry++;
        }
    } while (!failedRegions.isEmpty() && retry <= masterServices.getConfiguration().getInt(FAILED_MOVE_MAX_RETRY, DEFAULT_MAX_RETRY_VALUE));
    // has up to max retry time or there are no more regions to move
    if (!failedRegions.isEmpty()) {
        // print failed moved regions, for later process conveniently
        String msg = String.format("move regions for group %s failed, failed regions: %s", sourceGroupName, failedRegions);
        LOG.error(msg);
        throw new DoNotRetryIOException(msg + ", just record the last failed region's cause, more details in server log", toThrow);
    }
}
Also used : DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) ArrayList(java.util.ArrayList) RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) IOException(java.io.IOException) MultiRowMutationEndpoint(org.apache.hadoop.hbase.coprocessor.MultiRowMutationEndpoint) RegionPlan(org.apache.hadoop.hbase.master.RegionPlan) ServerName(org.apache.hadoop.hbase.ServerName) Pair(org.apache.hadoop.hbase.util.Pair) HashSet(java.util.HashSet)

Example 18 with RegionPlan

use of org.apache.hadoop.hbase.master.RegionPlan in project hbase by apache.

the class SimpleLoadBalancer method balanceCluster.

/**
   * Generate a global load balancing plan according to the specified map of
   * server information to the most loaded regions of each server.
   *
   * The load balancing invariant is that all servers are within 1 region of the
   * average number of regions per server.  If the average is an integer number,
   * all servers will be balanced to the average.  Otherwise, all servers will
   * have either floor(average) or ceiling(average) regions.
   *
   * HBASE-3609 Modeled regionsToMove using Guava's MinMaxPriorityQueue so that
   *   we can fetch from both ends of the queue.
   * At the beginning, we check whether there was empty region server
   *   just discovered by Master. If so, we alternately choose new / old
   *   regions from head / tail of regionsToMove, respectively. This alternation
   *   avoids clustering young regions on the newly discovered region server.
   *   Otherwise, we choose new regions from head of regionsToMove.
   *
   * Another improvement from HBASE-3609 is that we assign regions from
   *   regionsToMove to underloaded servers in round-robin fashion.
   *   Previously one underloaded server would be filled before we move onto
   *   the next underloaded server, leading to clustering of young regions.
   *
   * Finally, we randomly shuffle underloaded servers so that they receive
   *   offloaded regions relatively evenly across calls to balanceCluster().
   *
   * The algorithm is currently implemented as such:
   *
   * <ol>
   * <li>Determine the two valid numbers of regions each server should have,
   *     <b>MIN</b>=floor(average) and <b>MAX</b>=ceiling(average).
   *
   * <li>Iterate down the most loaded servers, shedding regions from each so
   *     each server hosts exactly <b>MAX</b> regions.  Stop once you reach a
   *     server that already has &lt;= <b>MAX</b> regions.
   *     <p>
   *     Order the regions to move from most recent to least.
   *
   * <li>Iterate down the least loaded servers, assigning regions so each server
   *     has exactly <b>MIN</b> regions.  Stop once you reach a server that
   *     already has &gt;= <b>MIN</b> regions.
   *
   *     Regions being assigned to underloaded servers are those that were shed
   *     in the previous step.  It is possible that there were not enough
   *     regions shed to fill each underloaded server to <b>MIN</b>.  If so we
   *     end up with a number of regions required to do so, <b>neededRegions</b>.
   *
   *     It is also possible that we were able to fill each underloaded but ended
   *     up with regions that were unassigned from overloaded servers but that
   *     still do not have assignment.
   *
   *     If neither of these conditions hold (no regions needed to fill the
   *     underloaded servers, no regions leftover from overloaded servers),
   *     we are done and return.  Otherwise we handle these cases below.
   *
   * <li>If <b>neededRegions</b> is non-zero (still have underloaded servers),
   *     we iterate the most loaded servers again, shedding a single server from
   *     each (this brings them from having <b>MAX</b> regions to having
   *     <b>MIN</b> regions).
   *
   * <li>We now definitely have more regions that need assignment, either from
   *     the previous step or from the original shedding from overloaded servers.
   *     Iterate the least loaded servers filling each to <b>MIN</b>.
   *
   * <li>If we still have more regions that need assignment, again iterate the
   *     least loaded servers, this time giving each one (filling them to
   *     <b>MAX</b>) until we run out.
   *
   * <li>All servers will now either host <b>MIN</b> or <b>MAX</b> regions.
   *
   *     In addition, any server hosting &gt;= <b>MAX</b> regions is guaranteed
   *     to end up with <b>MAX</b> regions at the end of the balancing.  This
   *     ensures the minimal number of regions possible are moved.
   * </ol>
   *
   * TODO: We can at-most reassign the number of regions away from a particular
   *       server to be how many they report as most loaded.
   *       Should we just keep all assignment in memory?  Any objections?
   *       Does this mean we need HeapSize on HMaster?  Or just careful monitor?
   *       (current thinking is we will hold all assignments in memory)
   *
   * @param clusterMap Map of regionservers and their load/region information to
   *                   a list of their most loaded regions
   * @return a list of regions to be moved, including source and destination,
   *         or null if cluster is already balanced
   */
@Override
public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterMap) {
    List<RegionPlan> regionsToReturn = balanceMasterRegions(clusterMap);
    if (regionsToReturn != null || clusterMap == null || clusterMap.size() <= 1) {
        return regionsToReturn;
    }
    if (masterServerName != null && clusterMap.containsKey(masterServerName)) {
        if (clusterMap.size() <= 2) {
            return null;
        }
        clusterMap = new HashMap<>(clusterMap);
        clusterMap.remove(masterServerName);
    }
    long startTime = System.currentTimeMillis();
    // construct a Cluster object with clusterMap and rest of the
    // argument as defaults
    Cluster c = new Cluster(clusterMap, null, this.regionFinder, this.rackManager);
    if (!this.needsBalance(c) && !this.overallNeedsBalance())
        return null;
    ClusterLoadState cs = new ClusterLoadState(clusterMap);
    int numServers = cs.getNumServers();
    NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
    int numRegions = cs.getNumRegions();
    float average = cs.getLoadAverage();
    int max = (int) Math.ceil(average);
    int min = (int) average;
    // Using to check balance result.
    StringBuilder strBalanceParam = new StringBuilder();
    strBalanceParam.append("Balance parameter: numRegions=").append(numRegions).append(", numServers=").append(numServers).append(", max=").append(max).append(", min=").append(min);
    LOG.debug(strBalanceParam.toString());
    // Balance the cluster
    // TODO: Look at data block locality or a more complex load to do this
    MinMaxPriorityQueue<RegionPlan> regionsToMove = MinMaxPriorityQueue.orderedBy(rpComparator).create();
    regionsToReturn = new ArrayList<>();
    // Walk down most loaded, pruning each to the max
    int serversOverloaded = 0;
    // flag used to fetch regions from head and tail of list, alternately
    boolean fetchFromTail = false;
    Map<ServerName, BalanceInfo> serverBalanceInfo = new TreeMap<>();
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) {
        ServerAndLoad sal = server.getKey();
        int load = sal.getLoad();
        if (load <= max) {
            serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0, server.getValue()));
            continue;
        }
        serversOverloaded++;
        List<HRegionInfo> regions = server.getValue();
        int numToOffload = Math.min(load - max, regions.size());
        // account for the out-of-band regions which were assigned to this server
        // after some other region server crashed
        Collections.sort(regions, riComparator);
        int numTaken = 0;
        for (int i = 0; i <= numToOffload; ) {
            // fetch from head
            HRegionInfo hri = regions.get(i);
            if (fetchFromTail) {
                hri = regions.get(regions.size() - 1 - i);
            }
            i++;
            // Don't rebalance special regions.
            if (shouldBeOnMaster(hri) && masterServerName.equals(sal.getServerName()))
                continue;
            regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null));
            numTaken++;
            if (numTaken >= numToOffload)
                break;
        }
        serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(numToOffload, (-1) * numTaken, server.getValue()));
    }
    int totalNumMoved = regionsToMove.size();
    // Walk down least loaded, filling each to the min
    // number of regions needed to bring all up to min
    int neededRegions = 0;
    fetchFromTail = false;
    Map<ServerName, Integer> underloadedServers = new HashMap<>();
    int maxToTake = numRegions - min;
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
        // no more to take
        if (maxToTake == 0)
            break;
        int load = server.getKey().getLoad();
        if (load >= min) {
            // look for other servers which haven't reached min
            continue;
        }
        int regionsToPut = min - load;
        maxToTake -= regionsToPut;
        underloadedServers.put(server.getKey().getServerName(), regionsToPut);
    }
    // number of servers that get new regions
    int serversUnderloaded = underloadedServers.size();
    int incr = 1;
    List<ServerName> sns = Arrays.asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded]));
    Collections.shuffle(sns, RANDOM);
    while (regionsToMove.size() > 0) {
        int cnt = 0;
        int i = incr > 0 ? 0 : underloadedServers.size() - 1;
        for (; i >= 0 && i < underloadedServers.size(); i += incr) {
            if (regionsToMove.isEmpty())
                break;
            ServerName si = sns.get(i);
            int numToTake = underloadedServers.get(si);
            if (numToTake == 0)
                continue;
            addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn);
            underloadedServers.put(si, numToTake - 1);
            cnt++;
            BalanceInfo bi = serverBalanceInfo.get(si);
            bi.setNumRegionsAdded(bi.getNumRegionsAdded() + 1);
        }
        if (cnt == 0)
            break;
        // iterates underloadedServers in the other direction
        incr = -incr;
    }
    for (Integer i : underloadedServers.values()) {
        // If we still want to take some, increment needed
        neededRegions += i;
    }
    // If we need more to fill min, grab one from each most loaded until enough
    if (neededRegions != 0) {
        // Walk down most loaded, grabbing one from each until we get enough
        for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.descendingMap().entrySet()) {
            BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
            int idx = balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload();
            if (idx >= server.getValue().size())
                break;
            HRegionInfo region = server.getValue().get(idx);
            // Don't move meta regions.
            if (region.isMetaRegion())
                continue;
            regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null));
            balanceInfo.setNumRegionsAdded(balanceInfo.getNumRegionsAdded() - 1);
            balanceInfo.setNextRegionForUnload(balanceInfo.getNextRegionForUnload() + 1);
            totalNumMoved++;
            if (--neededRegions == 0) {
                // No more regions needed, done shedding
                break;
            }
        }
    }
    // Walk down least loaded, assigning to each to fill up to min
    for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server : serversByLoad.entrySet()) {
        int regionCount = server.getKey().getLoad();
        if (regionCount >= min)
            break;
        BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
        if (balanceInfo != null) {
            regionCount += balanceInfo.getNumRegionsAdded();
        }
        if (regionCount >= min) {
            continue;
        }
        int numToTake = min - regionCount;
        int numTaken = 0;
        while (numTaken < numToTake && 0 < regionsToMove.size()) {
            addRegionPlan(regionsToMove, fetchFromTail, server.getKey().getServerName(), regionsToReturn);
            numTaken++;
        }
    }
    if (min != max) {
        balanceOverall(regionsToReturn, serverBalanceInfo, fetchFromTail, regionsToMove, max, min);
    }
    long endTime = System.currentTimeMillis();
    if (!regionsToMove.isEmpty() || neededRegions != 0) {
        // Emit data so can diagnose how balancer went astray.
        LOG.warn("regionsToMove=" + totalNumMoved + ", numServers=" + numServers + ", serversOverloaded=" + serversOverloaded + ", serversUnderloaded=" + serversUnderloaded);
        StringBuilder sb = new StringBuilder();
        for (Map.Entry<ServerName, List<HRegionInfo>> e : clusterMap.entrySet()) {
            if (sb.length() > 0)
                sb.append(", ");
            sb.append(e.getKey().toString());
            sb.append(" ");
            sb.append(e.getValue().size());
        }
        LOG.warn("Input " + sb.toString());
    }
    // All done!
    LOG.info("Done. Calculated a load balance in " + (endTime - startTime) + "ms. " + "Moving " + totalNumMoved + " regions off of " + serversOverloaded + " overloaded servers onto " + serversUnderloaded + " less loaded servers");
    return regionsToReturn;
}
Also used : HashMap(java.util.HashMap) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ArrayList(java.util.ArrayList) List(java.util.List) TreeMap(java.util.TreeMap) RegionPlan(org.apache.hadoop.hbase.master.RegionPlan) ServerName(org.apache.hadoop.hbase.ServerName) HashMap(java.util.HashMap) NavigableMap(java.util.NavigableMap) TreeMap(java.util.TreeMap) Map(java.util.Map)

Example 19 with RegionPlan

use of org.apache.hadoop.hbase.master.RegionPlan in project hbase by apache.

the class StochasticLoadBalancer method createRegionPlans.

/**
   * Create all of the RegionPlan's needed to move from the initial cluster state to the desired
   * state.
   *
   * @param cluster The state of the cluster
   * @return List of RegionPlan's that represent the moves needed to get to desired final state.
   */
private List<RegionPlan> createRegionPlans(Cluster cluster) {
    List<RegionPlan> plans = new LinkedList<>();
    for (int regionIndex = 0; regionIndex < cluster.regionIndexToServerIndex.length; regionIndex++) {
        int initialServerIndex = cluster.initialRegionIndexToServerIndex[regionIndex];
        int newServerIndex = cluster.regionIndexToServerIndex[regionIndex];
        if (initialServerIndex != newServerIndex) {
            HRegionInfo region = cluster.regions[regionIndex];
            ServerName initialServer = cluster.servers[initialServerIndex];
            ServerName newServer = cluster.servers[newServerIndex];
            if (LOG.isTraceEnabled()) {
                LOG.trace("Moving Region " + region.getEncodedName() + " from server " + initialServer.getHostname() + " to " + newServer.getHostname());
            }
            RegionPlan rp = new RegionPlan(region, initialServer, newServer);
            plans.add(rp);
        }
    }
    return plans;
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) RegionPlan(org.apache.hadoop.hbase.master.RegionPlan) ServerName(org.apache.hadoop.hbase.ServerName) LinkedList(java.util.LinkedList)

Example 20 with RegionPlan

use of org.apache.hadoop.hbase.master.RegionPlan in project hbase by apache.

the class StochasticLoadBalancer method balanceCluster.

/**
   * Given the cluster state this will try and approach an optimal balance. This
   * should always approach the optimal state given enough steps.
   */
@Override
public synchronized List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState) {
    List<RegionPlan> plans = balanceMasterRegions(clusterState);
    if (plans != null || clusterState == null || clusterState.size() <= 1) {
        return plans;
    }
    if (masterServerName != null && clusterState.containsKey(masterServerName)) {
        if (clusterState.size() <= 2) {
            return null;
        }
        clusterState = new HashMap<>(clusterState);
        clusterState.remove(masterServerName);
    }
    // On clusters with lots of HFileLinks or lots of reference files,
    // instantiating the storefile infos can be quite expensive.
    // Allow turning this feature off if the locality cost is not going to
    // be used in any computations.
    RegionLocationFinder finder = null;
    if (this.localityCost != null && this.localityCost.getMultiplier() > 0) {
        finder = this.regionFinder;
    }
    //The clusterState that is given to this method contains the state
    //of all the regions in the table(s) (that's true today)
    // Keep track of servers to iterate through them.
    Cluster cluster = new Cluster(clusterState, loads, finder, rackManager);
    long startTime = EnvironmentEdgeManager.currentTime();
    initCosts(cluster);
    if (!needsBalance(cluster)) {
        return null;
    }
    double currentCost = computeCost(cluster, Double.MAX_VALUE);
    curOverallCost = currentCost;
    for (int i = 0; i < this.curFunctionCosts.length; i++) {
        curFunctionCosts[i] = tempFunctionCosts[i];
    }
    LOG.info("start StochasticLoadBalancer.balancer, initCost=" + currentCost + ", functionCost=" + functionCost());
    double initCost = currentCost;
    double newCost = currentCost;
    long computedMaxSteps = Math.min(this.maxSteps, ((long) cluster.numRegions * (long) this.stepsPerRegion * (long) cluster.numServers));
    // Perform a stochastic walk to see if we can get a good fit.
    long step;
    for (step = 0; step < computedMaxSteps; step++) {
        int generatorIdx = RANDOM.nextInt(candidateGenerators.length);
        CandidateGenerator p = candidateGenerators[generatorIdx];
        Cluster.Action action = p.generate(cluster);
        if (action.type == Type.NULL) {
            continue;
        }
        cluster.doAction(action);
        updateCostsWithAction(cluster, action);
        newCost = computeCost(cluster, currentCost);
        // Should this be kept?
        if (newCost < currentCost) {
            currentCost = newCost;
            // save for JMX
            curOverallCost = currentCost;
            for (int i = 0; i < this.curFunctionCosts.length; i++) {
                curFunctionCosts[i] = tempFunctionCosts[i];
            }
        } else {
            // Put things back the way they were before.
            // TODO: undo by remembering old values
            Action undoAction = action.undoAction();
            cluster.doAction(undoAction);
            updateCostsWithAction(cluster, undoAction);
        }
        if (EnvironmentEdgeManager.currentTime() - startTime > maxRunningTime) {
            break;
        }
    }
    long endTime = EnvironmentEdgeManager.currentTime();
    metricsBalancer.balanceCluster(endTime - startTime);
    // update costs metrics
    updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
    if (initCost > currentCost) {
        plans = createRegionPlans(cluster);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Finished computing new load balance plan.  Computation took " + (endTime - startTime) + "ms to try " + step + " different iterations.  Found a solution that moves " + plans.size() + " regions; Going from a computed cost of " + initCost + " to a new cost of " + currentCost);
        }
        return plans;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Could not find a better load balance plan.  Tried " + step + " different configurations in " + (endTime - startTime) + "ms, and did not find anything with a computed cost less than " + initCost);
    }
    return null;
}
Also used : Action(org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action) MoveRegionAction(org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.MoveRegionAction) SwapRegionsAction(org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.SwapRegionsAction) AssignRegionAction(org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.AssignRegionAction) Action(org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action) RegionPlan(org.apache.hadoop.hbase.master.RegionPlan)

Aggregations

RegionPlan (org.apache.hadoop.hbase.master.RegionPlan)38 ServerName (org.apache.hadoop.hbase.ServerName)32 RegionInfo (org.apache.hadoop.hbase.client.RegionInfo)24 ArrayList (java.util.ArrayList)21 List (java.util.List)19 HashMap (java.util.HashMap)17 TableName (org.apache.hadoop.hbase.TableName)16 Map (java.util.Map)14 Test (org.junit.Test)14 IOException (java.io.IOException)11 TreeMap (java.util.TreeMap)11 CountDownLatch (java.util.concurrent.CountDownLatch)8 HMaster (org.apache.hadoop.hbase.master.HMaster)7 MasterServices (org.apache.hadoop.hbase.master.MasterServices)7 Configuration (org.apache.hadoop.conf.Configuration)6 HBaseClassTestRule (org.apache.hadoop.hbase.HBaseClassTestRule)6 HBaseTestingUtil (org.apache.hadoop.hbase.HBaseTestingUtil)6 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)6 MasterRegion (org.apache.hadoop.hbase.master.region.MasterRegion)6 MasterTests (org.apache.hadoop.hbase.testclassification.MasterTests)6