Search in sources :

Example 16 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class RSGroupBasedLoadBalancer method generateGroupMaps.

private void generateGroupMaps(List<HRegionInfo> regions, List<ServerName> servers, ListMultimap<String, HRegionInfo> regionMap, ListMultimap<String, ServerName> serverMap) throws HBaseIOException {
    try {
        for (HRegionInfo region : regions) {
            String groupName = rsGroupInfoManager.getRSGroupOfTable(region.getTable());
            if (groupName == null) {
                LOG.warn("Group for table " + region.getTable() + " is null");
            }
            regionMap.put(groupName, region);
        }
        for (String groupKey : regionMap.keySet()) {
            RSGroupInfo info = rsGroupInfoManager.getRSGroup(groupKey);
            serverMap.putAll(groupKey, filterOfflineServers(info, servers));
            if (serverMap.get(groupKey).size() < 1) {
                serverMap.put(groupKey, LoadBalancer.BOGUS_SERVER_NAME);
            }
        }
    } catch (IOException e) {
        throw new HBaseIOException("Failed to generate group maps", e);
    }
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) IOException(java.io.IOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException)

Example 17 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class RSGroupBasedLoadBalancer method retainAssignment.

@Override
public Map<ServerName, List<HRegionInfo>> retainAssignment(Map<HRegionInfo, ServerName> regions, List<ServerName> servers) throws HBaseIOException {
    try {
        Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<>();
        ListMultimap<String, HRegionInfo> groupToRegion = ArrayListMultimap.create();
        Set<HRegionInfo> misplacedRegions = getMisplacedRegions(regions);
        for (HRegionInfo region : regions.keySet()) {
            if (!misplacedRegions.contains(region)) {
                String groupName = rsGroupInfoManager.getRSGroupOfTable(region.getTable());
                groupToRegion.put(groupName, region);
            }
        }
        // assignments.
        for (String key : groupToRegion.keySet()) {
            Map<HRegionInfo, ServerName> currentAssignmentMap = new TreeMap<HRegionInfo, ServerName>();
            List<HRegionInfo> regionList = groupToRegion.get(key);
            RSGroupInfo info = rsGroupInfoManager.getRSGroup(key);
            List<ServerName> candidateList = filterOfflineServers(info, servers);
            for (HRegionInfo region : regionList) {
                currentAssignmentMap.put(region, regions.get(region));
            }
            if (candidateList.size() > 0) {
                assignments.putAll(this.internalBalancer.retainAssignment(currentAssignmentMap, candidateList));
            }
        }
        for (HRegionInfo region : misplacedRegions) {
            String groupName = rsGroupInfoManager.getRSGroupOfTable(region.getTable());
            ;
            RSGroupInfo info = rsGroupInfoManager.getRSGroup(groupName);
            List<ServerName> candidateList = filterOfflineServers(info, servers);
            ServerName server = this.internalBalancer.randomAssignment(region, candidateList);
            if (server != null) {
                if (!assignments.containsKey(server)) {
                    assignments.put(server, new ArrayList<>());
                }
                assignments.get(server).add(region);
            } else {
                //if not server is available assign to bogus so it ends up in RIT
                if (!assignments.containsKey(LoadBalancer.BOGUS_SERVER_NAME)) {
                    assignments.put(LoadBalancer.BOGUS_SERVER_NAME, new ArrayList<>());
                }
                assignments.get(LoadBalancer.BOGUS_SERVER_NAME).add(region);
            }
        }
        return assignments;
    } catch (IOException e) {
        throw new HBaseIOException("Failed to do online retain assignment", e);
    }
}
Also used : HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) IOException(java.io.IOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) TreeMap(java.util.TreeMap) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ServerName(org.apache.hadoop.hbase.ServerName) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List)

Example 18 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class AssignmentManager method assign.

/**
   * Caller must hold lock on the passed <code>state</code> object.
   * @param state
   * @param forceNewPlan
   */
private void assign(RegionState state, boolean forceNewPlan) {
    long startTime = EnvironmentEdgeManager.currentTime();
    try {
        Configuration conf = server.getConfiguration();
        RegionPlan plan = null;
        long maxWaitTime = -1;
        HRegionInfo region = state.getRegion();
        Throwable previousException = null;
        for (int i = 1; i <= maximumAttempts; i++) {
            if (server.isStopped() || server.isAborted()) {
                LOG.info("Skip assigning " + region.getRegionNameAsString() + ", the server is stopped/aborted");
                return;
            }
            if (plan == null) {
                // Get a server for the region at first
                try {
                    plan = getRegionPlan(region, forceNewPlan);
                } catch (HBaseIOException e) {
                    LOG.warn("Failed to get region plan", e);
                }
            }
            if (plan == null) {
                LOG.warn("Unable to determine a plan to assign " + region);
                // For meta region, we have to keep retrying until succeeding
                if (region.isMetaRegion()) {
                    if (i == maximumAttempts) {
                        // re-set attempt count to 0 for at least 1 retry
                        i = 0;
                        LOG.warn("Unable to determine a plan to assign a hbase:meta region " + region + " after maximumAttempts (" + this.maximumAttempts + "). Reset attempts count and continue retrying.");
                    }
                    waitForRetryingMetaAssignment();
                    continue;
                }
                regionStates.updateRegionState(region, State.FAILED_OPEN);
                return;
            }
            LOG.info("Assigning " + region.getRegionNameAsString() + " to " + plan.getDestination());
            // Transition RegionState to PENDING_OPEN
            regionStates.updateRegionState(region, State.PENDING_OPEN, plan.getDestination());
            boolean needNewPlan = false;
            final String assignMsg = "Failed assignment of " + region.getRegionNameAsString() + " to " + plan.getDestination();
            try {
                List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
                if (shouldAssignFavoredNodes(region)) {
                    favoredNodes = server.getFavoredNodesManager().getFavoredNodesWithDNPort(region);
                }
                serverManager.sendRegionOpen(plan.getDestination(), region, favoredNodes);
                // we're done
                return;
            } catch (Throwable t) {
                if (t instanceof RemoteException) {
                    t = ((RemoteException) t).unwrapRemoteException();
                }
                previousException = t;
                // Should we wait a little before retrying? If the server is starting it's yes.
                boolean hold = (t instanceof ServerNotRunningYetException);
                // In case socket is timed out and the region server is still online,
                // the openRegion RPC could have been accepted by the server and
                // just the response didn't go through.  So we will retry to
                // open the region on the same server.
                boolean retry = !hold && (t instanceof java.net.SocketTimeoutException && this.serverManager.isServerOnline(plan.getDestination()));
                if (hold) {
                    LOG.warn(assignMsg + ", waiting a little before trying on the same region server " + "try=" + i + " of " + this.maximumAttempts, t);
                    if (maxWaitTime < 0) {
                        maxWaitTime = EnvironmentEdgeManager.currentTime() + this.server.getConfiguration().getLong("hbase.regionserver.rpc.startup.waittime", 60000);
                    }
                    try {
                        long now = EnvironmentEdgeManager.currentTime();
                        if (now < maxWaitTime) {
                            if (LOG.isDebugEnabled()) {
                                LOG.debug("Server is not yet up; waiting up to " + (maxWaitTime - now) + "ms", t);
                            }
                            Thread.sleep(100);
                            // reset the try count
                            i--;
                        } else {
                            LOG.debug("Server is not up for a while; try a new one", t);
                            needNewPlan = true;
                        }
                    } catch (InterruptedException ie) {
                        LOG.warn("Failed to assign " + region.getRegionNameAsString() + " since interrupted", ie);
                        regionStates.updateRegionState(region, State.FAILED_OPEN);
                        Thread.currentThread().interrupt();
                        return;
                    }
                } else if (retry) {
                    // we want to retry as many times as needed as long as the RS is not dead.
                    i--;
                    if (LOG.isDebugEnabled()) {
                        LOG.debug(assignMsg + ", trying to assign to the same region server due ", t);
                    }
                } else {
                    needNewPlan = true;
                    LOG.warn(assignMsg + ", trying to assign elsewhere instead;" + " try=" + i + " of " + this.maximumAttempts, t);
                }
            }
            if (i == this.maximumAttempts) {
                // For meta region, we have to keep retrying until succeeding
                if (region.isMetaRegion()) {
                    // re-set attempt count to 0 for at least 1 retry
                    i = 0;
                    LOG.warn(assignMsg + ", trying to assign a hbase:meta region reached to maximumAttempts (" + this.maximumAttempts + ").  Reset attempt counts and continue retrying.");
                    waitForRetryingMetaAssignment();
                } else {
                    // This is the last try.
                    continue;
                }
            }
            // reassigning to same RS.
            if (needNewPlan) {
                // Force a new plan and reassign. Will return null if no servers.
                // The new plan could be the same as the existing plan since we don't
                // exclude the server of the original plan, which should not be
                // excluded since it could be the only server up now.
                RegionPlan newPlan = null;
                try {
                    newPlan = getRegionPlan(region, true);
                } catch (HBaseIOException e) {
                    LOG.warn("Failed to get region plan", e);
                }
                if (newPlan == null) {
                    regionStates.updateRegionState(region, State.FAILED_OPEN);
                    LOG.warn("Unable to find a viable location to assign region " + region.getRegionNameAsString());
                    return;
                }
                if (plan != newPlan && !plan.getDestination().equals(newPlan.getDestination())) {
                    // Clean out plan we failed execute and one that doesn't look like it'll
                    // succeed anyways; we need a new plan!
                    // Transition back to OFFLINE
                    regionStates.updateRegionState(region, State.OFFLINE);
                    plan = newPlan;
                } else if (plan.getDestination().equals(newPlan.getDestination()) && previousException instanceof FailedServerException) {
                    try {
                        LOG.info("Trying to re-assign " + region.getRegionNameAsString() + " to the same failed server.");
                        Thread.sleep(1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT));
                    } catch (InterruptedException ie) {
                        LOG.warn("Failed to assign " + region.getRegionNameAsString() + " since interrupted", ie);
                        regionStates.updateRegionState(region, State.FAILED_OPEN);
                        Thread.currentThread().interrupt();
                        return;
                    }
                }
            }
        }
        // Run out of attempts
        regionStates.updateRegionState(region, State.FAILED_OPEN);
    } finally {
        metricsAssignmentManager.updateAssignmentTime(EnvironmentEdgeManager.currentTime() - startTime);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) FailedServerException(org.apache.hadoop.hbase.ipc.FailedServerException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ServerName(org.apache.hadoop.hbase.ServerName) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 19 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class AssignmentManager method acceptPlan.

private void acceptPlan(final HashMap<RegionInfo, RegionStateNode> regions, final Map<ServerName, List<RegionInfo>> plan) throws HBaseIOException {
    final ProcedureEvent<?>[] events = new ProcedureEvent[regions.size()];
    final long st = EnvironmentEdgeManager.currentTime();
    if (plan.isEmpty()) {
        throw new HBaseIOException("unable to compute plans for regions=" + regions.size());
    }
    int evcount = 0;
    for (Map.Entry<ServerName, List<RegionInfo>> entry : plan.entrySet()) {
        final ServerName server = entry.getKey();
        for (RegionInfo hri : entry.getValue()) {
            final RegionStateNode regionNode = regions.get(hri);
            regionNode.setRegionLocation(server);
            if (server.equals(LoadBalancer.BOGUS_SERVER_NAME) && regionNode.isSystemTable()) {
                assignQueueLock.lock();
                try {
                    pendingAssignQueue.add(regionNode);
                } finally {
                    assignQueueLock.unlock();
                }
            } else {
                events[evcount++] = regionNode.getProcedureEvent();
            }
        }
    }
    ProcedureEvent.wakeEvents(getProcedureScheduler(), events);
    final long et = EnvironmentEdgeManager.currentTime();
    if (LOG.isTraceEnabled()) {
        LOG.trace("ASSIGN ACCEPT " + events.length + " -> " + StringUtils.humanTimeDiff(et - st));
    }
}
Also used : ProcedureEvent(org.apache.hadoop.hbase.procedure2.ProcedureEvent) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) ServerName(org.apache.hadoop.hbase.ServerName) List(java.util.List) ArrayList(java.util.ArrayList) RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) Map(java.util.Map) HashMap(java.util.HashMap)

Example 20 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class TransitRegionStateProcedure method confirmOpened.

private Flow confirmOpened(MasterProcedureEnv env, RegionStateNode regionNode) throws IOException {
    if (regionNode.isInState(State.OPEN)) {
        retryCounter = null;
        if (lastState == RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED) {
            // we are the last state, finish
            regionNode.unsetProcedure(this);
            ServerCrashProcedure.updateProgress(env, getParentProcId());
            return Flow.NO_MORE_STATE;
        }
        // It is possible that we arrive here but confirm opened is not the last state, for example,
        // when merging or splitting a region, we unassign the region from a RS and the RS is crashed,
        // then there will be recovered edits for this region, we'd better make the region online
        // again and then unassign it, otherwise we have to fail the merge/split procedure as we may
        // loss data.
        setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_CLOSE);
        return Flow.HAS_MORE_STATE;
    }
    int retries = env.getAssignmentManager().getRegionStates().addToFailedOpen(regionNode).incrementAndGetRetries();
    int maxAttempts = env.getAssignmentManager().getAssignMaxAttempts();
    LOG.info("Retry={} of max={}; {}; {}", retries, maxAttempts, this, regionNode.toShortString());
    if (retries >= maxAttempts) {
        env.getAssignmentManager().regionFailedOpen(regionNode, true);
        setFailure(getClass().getSimpleName(), new RetriesExhaustedException("Max attempts " + env.getAssignmentManager().getAssignMaxAttempts() + " exceeded"));
        regionNode.unsetProcedure(this);
        return Flow.NO_MORE_STATE;
    }
    env.getAssignmentManager().regionFailedOpen(regionNode, false);
    // we failed to assign the region, force a new plan
    forceNewPlan = true;
    regionNode.setRegionLocation(null);
    setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE);
    if (retries > env.getAssignmentManager().getAssignRetryImmediatelyMaxAttempts()) {
        // Throw exception to backoff and retry when failed open too many times
        throw new HBaseIOException("Failed confirm OPEN of " + regionNode + " (remote log may yield more detail on why).");
    } else {
        // Here we do not throw exception because we want to the region to be online ASAP
        return Flow.HAS_MORE_STATE;
    }
}
Also used : RetriesExhaustedException(org.apache.hadoop.hbase.client.RetriesExhaustedException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException)

Aggregations

HBaseIOException (org.apache.hadoop.hbase.HBaseIOException)36 IOException (java.io.IOException)19 ServerName (org.apache.hadoop.hbase.ServerName)17 ArrayList (java.util.ArrayList)13 RegionInfo (org.apache.hadoop.hbase.client.RegionInfo)13 List (java.util.List)8 HashMap (java.util.HashMap)7 InterruptedIOException (java.io.InterruptedIOException)5 Map (java.util.Map)5 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)5 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)5 Test (org.junit.Test)5 TreeMap (java.util.TreeMap)4 Configuration (org.apache.hadoop.conf.Configuration)4 NonNull (edu.umd.cs.findbugs.annotations.NonNull)3 ExecutionException (java.util.concurrent.ExecutionException)3 RegionLocations (org.apache.hadoop.hbase.RegionLocations)3 TableName (org.apache.hadoop.hbase.TableName)3 FavoredNodeAssignmentHelper (org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper)3 RSGroupAdminEndpoint (org.apache.hadoop.hbase.rsgroup.RSGroupAdminEndpoint)3