Search in sources :

Example 16 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class RSGroupBasedLoadBalancer method generateGroupMaps.

private void generateGroupMaps(List<HRegionInfo> regions, List<ServerName> servers, ListMultimap<String, HRegionInfo> regionMap, ListMultimap<String, ServerName> serverMap) throws HBaseIOException {
    try {
        for (HRegionInfo region : regions) {
            String groupName = rsGroupInfoManager.getRSGroupOfTable(region.getTable());
            if (groupName == null) {
                LOG.warn("Group for table " + region.getTable() + " is null");
            regionMap.put(groupName, region);
        for (String groupKey : regionMap.keySet()) {
            RSGroupInfo info = rsGroupInfoManager.getRSGroup(groupKey);
            serverMap.putAll(groupKey, filterOfflineServers(info, servers));
            if (serverMap.get(groupKey).size() < 1) {
                serverMap.put(groupKey, LoadBalancer.BOGUS_SERVER_NAME);
    } catch (IOException e) {
        throw new HBaseIOException("Failed to generate group maps", e);
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) IOException( HBaseIOException(org.apache.hadoop.hbase.HBaseIOException)

Example 17 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class RSGroupBasedLoadBalancer method retainAssignment.

public Map<ServerName, List<HRegionInfo>> retainAssignment(Map<HRegionInfo, ServerName> regions, List<ServerName> servers) throws HBaseIOException {
    try {
        Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<>();
        ListMultimap<String, HRegionInfo> groupToRegion = ArrayListMultimap.create();
        Set<HRegionInfo> misplacedRegions = getMisplacedRegions(regions);
        for (HRegionInfo region : regions.keySet()) {
            if (!misplacedRegions.contains(region)) {
                String groupName = rsGroupInfoManager.getRSGroupOfTable(region.getTable());
                groupToRegion.put(groupName, region);
        // assignments.
        for (String key : groupToRegion.keySet()) {
            Map<HRegionInfo, ServerName> currentAssignmentMap = new TreeMap<HRegionInfo, ServerName>();
            List<HRegionInfo> regionList = groupToRegion.get(key);
            RSGroupInfo info = rsGroupInfoManager.getRSGroup(key);
            List<ServerName> candidateList = filterOfflineServers(info, servers);
            for (HRegionInfo region : regionList) {
                currentAssignmentMap.put(region, regions.get(region));
            if (candidateList.size() > 0) {
                assignments.putAll(this.internalBalancer.retainAssignment(currentAssignmentMap, candidateList));
        for (HRegionInfo region : misplacedRegions) {
            String groupName = rsGroupInfoManager.getRSGroupOfTable(region.getTable());
            RSGroupInfo info = rsGroupInfoManager.getRSGroup(groupName);
            List<ServerName> candidateList = filterOfflineServers(info, servers);
            ServerName server = this.internalBalancer.randomAssignment(region, candidateList);
            if (server != null) {
                if (!assignments.containsKey(server)) {
                    assignments.put(server, new ArrayList<>());
            } else {
                //if not server is available assign to bogus so it ends up in RIT
                if (!assignments.containsKey(LoadBalancer.BOGUS_SERVER_NAME)) {
                    assignments.put(LoadBalancer.BOGUS_SERVER_NAME, new ArrayList<>());
        return assignments;
    } catch (IOException e) {
        throw new HBaseIOException("Failed to do online retain assignment", e);
Also used : HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) IOException( HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) TreeMap(java.util.TreeMap) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ServerName(org.apache.hadoop.hbase.ServerName) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List)

Example 18 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class AssignmentManager method assign.

   * Caller must hold lock on the passed <code>state</code> object.
   * @param state
   * @param forceNewPlan
private void assign(RegionState state, boolean forceNewPlan) {
    long startTime = EnvironmentEdgeManager.currentTime();
    try {
        Configuration conf = server.getConfiguration();
        RegionPlan plan = null;
        long maxWaitTime = -1;
        HRegionInfo region = state.getRegion();
        Throwable previousException = null;
        for (int i = 1; i <= maximumAttempts; i++) {
            if (server.isStopped() || server.isAborted()) {
      "Skip assigning " + region.getRegionNameAsString() + ", the server is stopped/aborted");
            if (plan == null) {
                // Get a server for the region at first
                try {
                    plan = getRegionPlan(region, forceNewPlan);
                } catch (HBaseIOException e) {
                    LOG.warn("Failed to get region plan", e);
            if (plan == null) {
                LOG.warn("Unable to determine a plan to assign " + region);
                // For meta region, we have to keep retrying until succeeding
                if (region.isMetaRegion()) {
                    if (i == maximumAttempts) {
                        // re-set attempt count to 0 for at least 1 retry
                        i = 0;
                        LOG.warn("Unable to determine a plan to assign a hbase:meta region " + region + " after maximumAttempts (" + this.maximumAttempts + "). Reset attempts count and continue retrying.");
                regionStates.updateRegionState(region, State.FAILED_OPEN);
  "Assigning " + region.getRegionNameAsString() + " to " + plan.getDestination());
            // Transition RegionState to PENDING_OPEN
            regionStates.updateRegionState(region, State.PENDING_OPEN, plan.getDestination());
            boolean needNewPlan = false;
            final String assignMsg = "Failed assignment of " + region.getRegionNameAsString() + " to " + plan.getDestination();
            try {
                List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
                if (shouldAssignFavoredNodes(region)) {
                    favoredNodes = server.getFavoredNodesManager().getFavoredNodesWithDNPort(region);
                serverManager.sendRegionOpen(plan.getDestination(), region, favoredNodes);
                // we're done
            } catch (Throwable t) {
                if (t instanceof RemoteException) {
                    t = ((RemoteException) t).unwrapRemoteException();
                previousException = t;
                // Should we wait a little before retrying? If the server is starting it's yes.
                boolean hold = (t instanceof ServerNotRunningYetException);
                // In case socket is timed out and the region server is still online,
                // the openRegion RPC could have been accepted by the server and
                // just the response didn't go through.  So we will retry to
                // open the region on the same server.
                boolean retry = !hold && (t instanceof && this.serverManager.isServerOnline(plan.getDestination()));
                if (hold) {
                    LOG.warn(assignMsg + ", waiting a little before trying on the same region server " + "try=" + i + " of " + this.maximumAttempts, t);
                    if (maxWaitTime < 0) {
                        maxWaitTime = EnvironmentEdgeManager.currentTime() + this.server.getConfiguration().getLong("hbase.regionserver.rpc.startup.waittime", 60000);
                    try {
                        long now = EnvironmentEdgeManager.currentTime();
                        if (now < maxWaitTime) {
                            if (LOG.isDebugEnabled()) {
                                LOG.debug("Server is not yet up; waiting up to " + (maxWaitTime - now) + "ms", t);
                            // reset the try count
                        } else {
                            LOG.debug("Server is not up for a while; try a new one", t);
                            needNewPlan = true;
                    } catch (InterruptedException ie) {
                        LOG.warn("Failed to assign " + region.getRegionNameAsString() + " since interrupted", ie);
                        regionStates.updateRegionState(region, State.FAILED_OPEN);
                } else if (retry) {
                    // we want to retry as many times as needed as long as the RS is not dead.
                    if (LOG.isDebugEnabled()) {
                        LOG.debug(assignMsg + ", trying to assign to the same region server due ", t);
                } else {
                    needNewPlan = true;
                    LOG.warn(assignMsg + ", trying to assign elsewhere instead;" + " try=" + i + " of " + this.maximumAttempts, t);
            if (i == this.maximumAttempts) {
                // For meta region, we have to keep retrying until succeeding
                if (region.isMetaRegion()) {
                    // re-set attempt count to 0 for at least 1 retry
                    i = 0;
                    LOG.warn(assignMsg + ", trying to assign a hbase:meta region reached to maximumAttempts (" + this.maximumAttempts + ").  Reset attempt counts and continue retrying.");
                } else {
                    // This is the last try.
            // reassigning to same RS.
            if (needNewPlan) {
                // Force a new plan and reassign. Will return null if no servers.
                // The new plan could be the same as the existing plan since we don't
                // exclude the server of the original plan, which should not be
                // excluded since it could be the only server up now.
                RegionPlan newPlan = null;
                try {
                    newPlan = getRegionPlan(region, true);
                } catch (HBaseIOException e) {
                    LOG.warn("Failed to get region plan", e);
                if (newPlan == null) {
                    regionStates.updateRegionState(region, State.FAILED_OPEN);
                    LOG.warn("Unable to find a viable location to assign region " + region.getRegionNameAsString());
                if (plan != newPlan && !plan.getDestination().equals(newPlan.getDestination())) {
                    // Clean out plan we failed execute and one that doesn't look like it'll
                    // succeed anyways; we need a new plan!
                    // Transition back to OFFLINE
                    regionStates.updateRegionState(region, State.OFFLINE);
                    plan = newPlan;
                } else if (plan.getDestination().equals(newPlan.getDestination()) && previousException instanceof FailedServerException) {
                    try {
              "Trying to re-assign " + region.getRegionNameAsString() + " to the same failed server.");
                        Thread.sleep(1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT));
                    } catch (InterruptedException ie) {
                        LOG.warn("Failed to assign " + region.getRegionNameAsString() + " since interrupted", ie);
                        regionStates.updateRegionState(region, State.FAILED_OPEN);
        // Run out of attempts
        regionStates.updateRegionState(region, State.FAILED_OPEN);
    } finally {
        metricsAssignmentManager.updateAssignmentTime(EnvironmentEdgeManager.currentTime() - startTime);
Also used : Configuration(org.apache.hadoop.conf.Configuration) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) FailedServerException(org.apache.hadoop.hbase.ipc.FailedServerException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ServerName(org.apache.hadoop.hbase.ServerName) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 19 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class AssignmentManager method acceptPlan.

private void acceptPlan(final HashMap<RegionInfo, RegionStateNode> regions, final Map<ServerName, List<RegionInfo>> plan) throws HBaseIOException {
    final ProcedureEvent<?>[] events = new ProcedureEvent[regions.size()];
    final long st = EnvironmentEdgeManager.currentTime();
    if (plan.isEmpty()) {
        throw new HBaseIOException("unable to compute plans for regions=" + regions.size());
    int evcount = 0;
    for (Map.Entry<ServerName, List<RegionInfo>> entry : plan.entrySet()) {
        final ServerName server = entry.getKey();
        for (RegionInfo hri : entry.getValue()) {
            final RegionStateNode regionNode = regions.get(hri);
            if (server.equals(LoadBalancer.BOGUS_SERVER_NAME) && regionNode.isSystemTable()) {
                try {
                } finally {
            } else {
                events[evcount++] = regionNode.getProcedureEvent();
    ProcedureEvent.wakeEvents(getProcedureScheduler(), events);
    final long et = EnvironmentEdgeManager.currentTime();
    if (LOG.isTraceEnabled()) {
        LOG.trace("ASSIGN ACCEPT " + events.length + " -> " + StringUtils.humanTimeDiff(et - st));
Also used : ProcedureEvent(org.apache.hadoop.hbase.procedure2.ProcedureEvent) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) ServerName(org.apache.hadoop.hbase.ServerName) List(java.util.List) ArrayList(java.util.ArrayList) RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) Map(java.util.Map) HashMap(java.util.HashMap)

Example 20 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class TransitRegionStateProcedure method confirmOpened.

private Flow confirmOpened(MasterProcedureEnv env, RegionStateNode regionNode) throws IOException {
    if (regionNode.isInState(State.OPEN)) {
        retryCounter = null;
        if (lastState == RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED) {
            // we are the last state, finish
            ServerCrashProcedure.updateProgress(env, getParentProcId());
            return Flow.NO_MORE_STATE;
        // It is possible that we arrive here but confirm opened is not the last state, for example,
        // when merging or splitting a region, we unassign the region from a RS and the RS is crashed,
        // then there will be recovered edits for this region, we'd better make the region online
        // again and then unassign it, otherwise we have to fail the merge/split procedure as we may
        // loss data.
        return Flow.HAS_MORE_STATE;
    int retries = env.getAssignmentManager().getRegionStates().addToFailedOpen(regionNode).incrementAndGetRetries();
    int maxAttempts = env.getAssignmentManager().getAssignMaxAttempts();"Retry={} of max={}; {}; {}", retries, maxAttempts, this, regionNode.toShortString());
    if (retries >= maxAttempts) {
        env.getAssignmentManager().regionFailedOpen(regionNode, true);
        setFailure(getClass().getSimpleName(), new RetriesExhaustedException("Max attempts " + env.getAssignmentManager().getAssignMaxAttempts() + " exceeded"));
        return Flow.NO_MORE_STATE;
    env.getAssignmentManager().regionFailedOpen(regionNode, false);
    // we failed to assign the region, force a new plan
    forceNewPlan = true;
    if (retries > env.getAssignmentManager().getAssignRetryImmediatelyMaxAttempts()) {
        // Throw exception to backoff and retry when failed open too many times
        throw new HBaseIOException("Failed confirm OPEN of " + regionNode + " (remote log may yield more detail on why).");
    } else {
        // Here we do not throw exception because we want to the region to be online ASAP
        return Flow.HAS_MORE_STATE;
Also used : RetriesExhaustedException(org.apache.hadoop.hbase.client.RetriesExhaustedException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException)


HBaseIOException (org.apache.hadoop.hbase.HBaseIOException)36 IOException ( ServerName (org.apache.hadoop.hbase.ServerName)17 ArrayList (java.util.ArrayList)13 RegionInfo (org.apache.hadoop.hbase.client.RegionInfo)13 List (java.util.List)8 HashMap (java.util.HashMap)7 InterruptedIOException ( Map (java.util.Map)5 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)5 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)5 Test (org.junit.Test)5 TreeMap (java.util.TreeMap)4 Configuration (org.apache.hadoop.conf.Configuration)4 NonNull (edu.umd.cs.findbugs.annotations.NonNull)3 ExecutionException (java.util.concurrent.ExecutionException)3 RegionLocations (org.apache.hadoop.hbase.RegionLocations)3 TableName (org.apache.hadoop.hbase.TableName)3 FavoredNodeAssignmentHelper (org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper)3 RSGroupAdminEndpoint (org.apache.hadoop.hbase.rsgroup.RSGroupAdminEndpoint)3