Search in sources :

Example 1 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class AssignmentManager method onRegionFailedOpen.

@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "AT_OPERATION_SEQUENCE_ON_CONCURRENT_ABSTRACTION", justification = "Worth fixing but not the end of the world.")
private String onRegionFailedOpen(final RegionState current, final HRegionInfo hri, final ServerName serverName) {
    // it could be a reportRegionTransition RPC retry.
    if (current == null || !current.isOpeningOrFailedOpenOnServer(serverName)) {
        return hri.getShortNameToLog() + " is not opening on " + serverName;
    // Just return in case of retrying
    if (current.isFailedOpen()) {
        return null;
    String encodedName = hri.getEncodedName();
    AtomicInteger failedOpenCount = failedOpenTracker.get(encodedName);
    if (failedOpenCount == null) {
        failedOpenCount = new AtomicInteger();
        // No need to use putIfAbsent, or extra synchronization since
        // this whole handleRegion block is locked on the encoded region
        // name, and failedOpenTracker is updated only in this block
        failedOpenTracker.put(encodedName, failedOpenCount);
    if (failedOpenCount.incrementAndGet() >= maximumAttempts && !hri.isMetaRegion()) {
        regionStates.updateRegionState(hri, State.FAILED_OPEN);
        // remove the tracking info to save memory, also reset
        // the count for next open initiative
    } else {
        if (hri.isMetaRegion() && failedOpenCount.get() >= maximumAttempts) {
            // Log a warning message if a meta region failedOpenCount exceeds maximumAttempts
            // so that we are aware of potential problem if it persists for a long time.
            LOG.warn("Failed to open the hbase:meta region " + hri.getRegionNameAsString() + " after" + failedOpenCount.get() + " retries. Continue retrying.");
        // Handle this the same as if it were opened and then closed.
        RegionState regionState = regionStates.updateRegionState(hri, State.CLOSED);
        if (regionState != null) {
            // destination and the same is updated in the region plan. (HBASE-5546)
            if (getTableStateManager().isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING) || replicasToClose.contains(hri)) {
                return null;
            regionStates.updateRegionState(hri, RegionState.State.CLOSED);
            // This below has to do w/ online enable/disable of a table
            try {
                getRegionPlan(hri, true);
            } catch (HBaseIOException e) {
                LOG.warn("Failed to get region plan", e);
            // Have the current thread sleep a bit before resubmitting the RPC request
            long sleepTime = backoffPolicy.getBackoffTime(retryConfig, failedOpenTracker.get(encodedName).get());
            invokeAssignLater(hri, sleepTime);
    // Null means no error
    return null;
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException)

Example 2 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class FavoredNodeAssignmentHelper method generateFavoredNodes.

   * Generate favored nodes for a region.
   * Choose a random server as primary and then choose secondary and tertiary FN so its spread
   * across two racks.
List<ServerName> generateFavoredNodes(HRegionInfo hri) throws IOException {
    List<ServerName> favoredNodesForRegion = new ArrayList<>(FAVORED_NODES_NUM);
    ServerName primary = servers.get(random.nextInt(servers.size()));
    favoredNodesForRegion.add(ServerName.valueOf(primary.getHostAndPort(), ServerName.NON_STARTCODE));
    Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<>(1);
    primaryRSMap.put(hri, primary);
    Map<HRegionInfo, ServerName[]> secondaryAndTertiaryRSMap = placeSecondaryAndTertiaryRS(primaryRSMap);
    ServerName[] secondaryAndTertiaryNodes = secondaryAndTertiaryRSMap.get(hri);
    if (secondaryAndTertiaryNodes != null && secondaryAndTertiaryNodes.length == 2) {
        for (ServerName sn : secondaryAndTertiaryNodes) {
            favoredNodesForRegion.add(ServerName.valueOf(sn.getHostAndPort(), ServerName.NON_STARTCODE));
        return favoredNodesForRegion;
    } else {
        throw new HBaseIOException("Unable to generate secondary and tertiary favored nodes.");
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) HashMap(java.util.HashMap) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) ServerName(org.apache.hadoop.hbase.ServerName) ArrayList(java.util.ArrayList)

Example 3 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class RSRpcServices method doNonAtomicRegionMutation.

 * Run through the regionMutation <code>rm</code> and per Mutation, do the work, and then when
 * done, add an instance of a {@link ResultOrException} that corresponds to each Mutation.
 * @param cellsToReturn  Could be null. May be allocated in this method.  This is what this
 * method returns as a 'result'.
 * @param closeCallBack the callback to be used with multigets
 * @param context the current RpcCallContext
 * @return Return the <code>cellScanner</code> passed
private List<CellScannable> doNonAtomicRegionMutation(final HRegion region, final OperationQuota quota, final RegionAction actions, final CellScanner cellScanner, final RegionActionResult.Builder builder, List<CellScannable> cellsToReturn, long nonceGroup, final RegionScannersCloseCallBack closeCallBack, RpcCallContext context, ActivePolicyEnforcement spaceQuotaEnforcement) {
    // Gather up CONTIGUOUS Puts and Deletes in this mutations List.  Idea is that rather than do
    // one at a time, we instead pass them in batch.  Be aware that the corresponding
    // ResultOrException instance that matches each Put or Delete is then added down in the
    // doNonAtomicBatchOp call.  We should be staying aligned though the Put and Delete are
    // deferred/batched
    List<ClientProtos.Action> mutations = null;
    long maxQuotaResultSize = Math.min(maxScannerResultSize, quota.getReadAvailable());
    IOException sizeIOE = null;
    Object lastBlock = null;
    ClientProtos.ResultOrException.Builder resultOrExceptionBuilder = ResultOrException.newBuilder();
    boolean hasResultOrException = false;
    for (ClientProtos.Action action : actions.getActionList()) {
        hasResultOrException = false;
        try {
            Result r = null;
            if (context != null && context.isRetryImmediatelySupported() && (context.getResponseCellSize() > maxQuotaResultSize || context.getResponseBlockSize() + context.getResponseExceptionSize() > maxQuotaResultSize)) {
                // change after the response size limit is reached.
                if (sizeIOE == null) {
                    // We don't need the stack un-winding do don't throw the exception.
                    // Throwing will kill the JVM's JIT.
                    // Instead just create the exception and then store it.
                    sizeIOE = new MultiActionResultTooLarge("Max size exceeded" + " CellSize: " + context.getResponseCellSize() + " BlockSize: " + context.getResponseBlockSize());
                    // Only report the exception once since there's only one request that
                    // caused the exception. Otherwise this number will dominate the exceptions count.
                // Now that there's an exception is known to be created
                // use it for the response.
                // This will create a copy in the builder.
                NameBytesPair pair = ResponseConverter.buildException(sizeIOE);
                skipCellsForMutation(action, cellScanner);
            if (action.hasGet()) {
                long before = EnvironmentEdgeManager.currentTime();
                ClientProtos.Get pbGet = action.getGet();
                // they are; its a problem for non-native clients like asynchbase. HBASE-20225.
                if (pbGet.hasClosestRowBefore() && pbGet.getClosestRowBefore()) {
                    throw new UnknownProtocolException("Is this a pre-hbase-1.0.0 or asynchbase client? " + "Client is invoking getClosestRowBefore removed in hbase-2.0.0 replaced by " + "reverse Scan.");
                try {
                    Get get = ProtobufUtil.toGet(pbGet);
                    if (context != null) {
                        r = get(get, (region), closeCallBack, context);
                    } else {
                        r = region.get(get);
                } finally {
                    final MetricsRegionServer metricsRegionServer = server.getMetrics();
                    if (metricsRegionServer != null) {
                        metricsRegionServer.updateGet(region.getTableDescriptor().getTableName(), EnvironmentEdgeManager.currentTime() - before);
            } else if (action.hasServiceCall()) {
                hasResultOrException = true;
                Message result = execServiceOnRegion(region, action.getServiceCall());
                ClientProtos.CoprocessorServiceResult.Builder serviceResultBuilder = ClientProtos.CoprocessorServiceResult.newBuilder();
            } else if (action.hasMutation()) {
                MutationType type = action.getMutation().getMutateType();
                if (type != MutationType.PUT && type != MutationType.DELETE && mutations != null && !mutations.isEmpty()) {
                    // Flush out any Puts or Deletes already collected.
                    doNonAtomicBatchOp(builder, region, quota, mutations, cellScanner, spaceQuotaEnforcement);
                switch(type) {
                    case APPEND:
                        r = append(region, quota, action.getMutation(), cellScanner, nonceGroup, spaceQuotaEnforcement);
                    case INCREMENT:
                        r = increment(region, quota, action.getMutation(), cellScanner, nonceGroup, spaceQuotaEnforcement);
                    case PUT:
                    case DELETE:
                        // Collect the individual mutations and apply in a batch
                        if (mutations == null) {
                            mutations = new ArrayList<>(actions.getActionCount());
                        throw new DoNotRetryIOException("Unsupported mutate type: " +;
            } else {
                throw new HBaseIOException("Unexpected Action type");
            if (r != null) {
                ClientProtos.Result pbResult = null;
                if (isClientCellBlockSupport(context)) {
                    pbResult = ProtobufUtil.toResultNoData(r);
                    // Hard to guess the size here.  Just make a rough guess.
                    if (cellsToReturn == null) {
                        cellsToReturn = new ArrayList<>();
                } else {
                    pbResult = ProtobufUtil.toResult(r);
                lastBlock = addSize(context, r, lastBlock);
                hasResultOrException = true;
        // Could get to here and there was no result and no exception.  Presumes we added
        // a Put or Delete to the collecting Mutations List for adding later.  In this
        // case the corresponding ResultOrException instance for the Put or Delete will be added
        // down in the doNonAtomicBatchOp method call rather than up here.
        } catch (IOException ie) {
            hasResultOrException = true;
            NameBytesPair pair = ResponseConverter.buildException(ie);
        if (hasResultOrException) {
            // Propagate index.
    // Finish up any outstanding mutations
    if (!CollectionUtils.isEmpty(mutations)) {
        doNonAtomicBatchOp(builder, region, quota, mutations, cellScanner, spaceQuotaEnforcement);
    return cellsToReturn;
Also used : MultiActionResultTooLarge(org.apache.hadoop.hbase.MultiActionResultTooLarge) RegionAction(org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.RegionAction) Action(org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.Action) MutationType(org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.MutationProto.MutationType) Message( DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) CacheBuilder( CacheEvictionStatsBuilder(org.apache.hadoop.hbase.CacheEvictionStatsBuilder) Action(org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.Action) IOException( DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) UncheckedIOException( RegionActionResult(org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.RegionActionResult) Result(org.apache.hadoop.hbase.client.Result) CheckAndMutateResult(org.apache.hadoop.hbase.client.CheckAndMutateResult) NameBytesPair(org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.NameBytesPair) Get(org.apache.hadoop.hbase.client.Get) MutableObject(org.apache.commons.lang3.mutable.MutableObject) UnknownProtocolException(org.apache.hadoop.hbase.exceptions.UnknownProtocolException) ResultOrException(org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.ResultOrException) ClientProtos(org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos)

Example 4 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class BaseLoadBalancer method createCluster.

private BalancerClusterState createCluster(List<ServerName> servers, Collection<RegionInfo> regions) throws HBaseIOException {
    boolean hasRegionReplica = false;
    try {
        if (provider != null) {
            hasRegionReplica = provider.hasRegionReplica(regions);
    } catch (IOException ioe) {
        throw new HBaseIOException(ioe);
    // Get the snapshot of the current assignments for the regions in question, and then create
    // a cluster out of it. Note that we might have replicas already assigned to some servers
    // earlier. So we want to get the snapshot to see those assignments, but this will only contain
    // replicas of the regions that are passed (for performance).
    Map<ServerName, List<RegionInfo>> clusterState = null;
    if (!hasRegionReplica) {
        clusterState = getRegionAssignmentsByServer(regions);
    } else {
        // for the case where we have region replica it is better we get the entire cluster's snapshot
        clusterState = getRegionAssignmentsByServer(null);
    for (ServerName server : servers) {
        if (!clusterState.containsKey(server)) {
            clusterState.put(server, Collections.emptyList());
    return new BalancerClusterState(regions, clusterState, null, this.regionFinder, rackManager);
Also used : HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) ServerName(org.apache.hadoop.hbase.ServerName) ArrayList(java.util.ArrayList) List(java.util.List) IOException( HBaseIOException(org.apache.hadoop.hbase.HBaseIOException)

Example 5 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class FavoredStochasticBalancer method roundRobinAssignment.

 * Round robin assignment: Segregate the regions into two types:
 * 1. The regions that have favored node assignment where at least one of the favored node
 * is still alive. In this case, try to adhere to the current favored nodes assignment as
 * much as possible - i.e., if the current primary is gone, then make the secondary or
 * tertiary as the new host for the region (based on their current load). Note that we don't
 * change the favored node assignments here (even though one or more favored node is
 * currently down). That will be done by the admin operations.
 * 2. The regions that currently don't have favored node assignments. Generate favored nodes
 * for them and then assign. Generate the primary fn in round robin fashion and generate
 * secondary and tertiary as per favored nodes constraints.
public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions, List<ServerName> servers) throws HBaseIOException {
    Map<ServerName, List<RegionInfo>> assignmentMap = new HashMap<>();
    if (regions.isEmpty()) {
        return assignmentMap;
    Set<RegionInfo> regionSet = new HashSet<>(regions);
    try {
        FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager);
        Set<RegionInfo> systemRegions = FavoredNodesManager.filterNonFNApplicableRegions(regionSet);
        // Assign all system regions
        Map<ServerName, List<RegionInfo>> systemAssignments = super.roundRobinAssignment(Lists.newArrayList(systemRegions), servers);
        // Segregate favored and non-favored nodes regions and assign accordingly.
        Pair<Map<ServerName, List<RegionInfo>>, List<RegionInfo>> segregatedRegions = segregateRegionsAndAssignRegionsWithFavoredNodes(regionSet, servers);
        Map<ServerName, List<RegionInfo>> regionsWithFavoredNodesMap = segregatedRegions.getFirst();
        Map<ServerName, List<RegionInfo>> regionsWithoutFN = generateFNForRegionsWithoutFN(helper, segregatedRegions.getSecond());
        // merge the assignment maps
        mergeAssignmentMaps(assignmentMap, systemAssignments);
        mergeAssignmentMaps(assignmentMap, regionsWithFavoredNodesMap);
        mergeAssignmentMaps(assignmentMap, regionsWithoutFN);
    } catch (Exception ex) {
        throw new HBaseIOException("Encountered exception while doing favored-nodes assignment " + ex + " Falling back to regular assignment", ex);
    return assignmentMap;
Also used : HashMap(java.util.HashMap) FavoredNodeAssignmentHelper(org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) ServerName(org.apache.hadoop.hbase.ServerName) ArrayList(java.util.ArrayList) List(java.util.List) RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) HashMap(java.util.HashMap) Map(java.util.Map) IOException( HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) HashSet(java.util.HashSet) NonNull(edu.umd.cs.findbugs.annotations.NonNull)


HBaseIOException (org.apache.hadoop.hbase.HBaseIOException)36 IOException ( ServerName (org.apache.hadoop.hbase.ServerName)17 ArrayList (java.util.ArrayList)13 RegionInfo (org.apache.hadoop.hbase.client.RegionInfo)13 List (java.util.List)8 HashMap (java.util.HashMap)7 InterruptedIOException ( Map (java.util.Map)5 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)5 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)5 Test (org.junit.Test)5 TreeMap (java.util.TreeMap)4 Configuration (org.apache.hadoop.conf.Configuration)4 NonNull (edu.umd.cs.findbugs.annotations.NonNull)3 ExecutionException (java.util.concurrent.ExecutionException)3 RegionLocations (org.apache.hadoop.hbase.RegionLocations)3 TableName (org.apache.hadoop.hbase.TableName)3 FavoredNodeAssignmentHelper (org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper)3 RSGroupAdminEndpoint (org.apache.hadoop.hbase.rsgroup.RSGroupAdminEndpoint)3