Search in sources :

Example 1 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class AssignmentManager method onRegionFailedOpen.

@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "AT_OPERATION_SEQUENCE_ON_CONCURRENT_ABSTRACTION", justification = "Worth fixing but not the end of the world.")
private String onRegionFailedOpen(final RegionState current, final HRegionInfo hri, final ServerName serverName) {
    // it could be a reportRegionTransition RPC retry.
    if (current == null || !current.isOpeningOrFailedOpenOnServer(serverName)) {
        return hri.getShortNameToLog() + " is not opening on " + serverName;
    }
    // Just return in case of retrying
    if (current.isFailedOpen()) {
        return null;
    }
    String encodedName = hri.getEncodedName();
    // FindBugs: AT_OPERATION_SEQUENCE_ON_CONCURRENT_ABSTRACTION Worth fixing!!!
    AtomicInteger failedOpenCount = failedOpenTracker.get(encodedName);
    if (failedOpenCount == null) {
        failedOpenCount = new AtomicInteger();
        // No need to use putIfAbsent, or extra synchronization since
        // this whole handleRegion block is locked on the encoded region
        // name, and failedOpenTracker is updated only in this block
        failedOpenTracker.put(encodedName, failedOpenCount);
    }
    if (failedOpenCount.incrementAndGet() >= maximumAttempts && !hri.isMetaRegion()) {
        regionStates.updateRegionState(hri, State.FAILED_OPEN);
        // remove the tracking info to save memory, also reset
        // the count for next open initiative
        failedOpenTracker.remove(encodedName);
    } else {
        if (hri.isMetaRegion() && failedOpenCount.get() >= maximumAttempts) {
            // Log a warning message if a meta region failedOpenCount exceeds maximumAttempts
            // so that we are aware of potential problem if it persists for a long time.
            LOG.warn("Failed to open the hbase:meta region " + hri.getRegionNameAsString() + " after" + failedOpenCount.get() + " retries. Continue retrying.");
        }
        // Handle this the same as if it were opened and then closed.
        RegionState regionState = regionStates.updateRegionState(hri, State.CLOSED);
        if (regionState != null) {
            // destination and the same is updated in the region plan. (HBASE-5546)
            if (getTableStateManager().isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING) || replicasToClose.contains(hri)) {
                offlineDisabledRegion(hri);
                return null;
            }
            regionStates.updateRegionState(hri, RegionState.State.CLOSED);
            // This below has to do w/ online enable/disable of a table
            removeClosedRegion(hri);
            try {
                getRegionPlan(hri, true);
            } catch (HBaseIOException e) {
                LOG.warn("Failed to get region plan", e);
            }
            // Have the current thread sleep a bit before resubmitting the RPC request
            long sleepTime = backoffPolicy.getBackoffTime(retryConfig, failedOpenTracker.get(encodedName).get());
            invokeAssignLater(hri, sleepTime);
        }
    }
    // Null means no error
    return null;
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException)

Example 2 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class FavoredNodeAssignmentHelper method generateFavoredNodes.

/*
   * Generate favored nodes for a region.
   *
   * Choose a random server as primary and then choose secondary and tertiary FN so its spread
   * across two racks.
   */
List<ServerName> generateFavoredNodes(HRegionInfo hri) throws IOException {
    List<ServerName> favoredNodesForRegion = new ArrayList<>(FAVORED_NODES_NUM);
    ServerName primary = servers.get(random.nextInt(servers.size()));
    favoredNodesForRegion.add(ServerName.valueOf(primary.getHostAndPort(), ServerName.NON_STARTCODE));
    Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<>(1);
    primaryRSMap.put(hri, primary);
    Map<HRegionInfo, ServerName[]> secondaryAndTertiaryRSMap = placeSecondaryAndTertiaryRS(primaryRSMap);
    ServerName[] secondaryAndTertiaryNodes = secondaryAndTertiaryRSMap.get(hri);
    if (secondaryAndTertiaryNodes != null && secondaryAndTertiaryNodes.length == 2) {
        for (ServerName sn : secondaryAndTertiaryNodes) {
            favoredNodesForRegion.add(ServerName.valueOf(sn.getHostAndPort(), ServerName.NON_STARTCODE));
        }
        return favoredNodesForRegion;
    } else {
        throw new HBaseIOException("Unable to generate secondary and tertiary favored nodes.");
    }
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) HashMap(java.util.HashMap) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) ServerName(org.apache.hadoop.hbase.ServerName) ArrayList(java.util.ArrayList)

Example 3 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class RSRpcServices method doNonAtomicRegionMutation.

/**
 * Run through the regionMutation <code>rm</code> and per Mutation, do the work, and then when
 * done, add an instance of a {@link ResultOrException} that corresponds to each Mutation.
 * @param cellsToReturn  Could be null. May be allocated in this method.  This is what this
 * method returns as a 'result'.
 * @param closeCallBack the callback to be used with multigets
 * @param context the current RpcCallContext
 * @return Return the <code>cellScanner</code> passed
 */
private List<CellScannable> doNonAtomicRegionMutation(final HRegion region, final OperationQuota quota, final RegionAction actions, final CellScanner cellScanner, final RegionActionResult.Builder builder, List<CellScannable> cellsToReturn, long nonceGroup, final RegionScannersCloseCallBack closeCallBack, RpcCallContext context, ActivePolicyEnforcement spaceQuotaEnforcement) {
    // Gather up CONTIGUOUS Puts and Deletes in this mutations List.  Idea is that rather than do
    // one at a time, we instead pass them in batch.  Be aware that the corresponding
    // ResultOrException instance that matches each Put or Delete is then added down in the
    // doNonAtomicBatchOp call.  We should be staying aligned though the Put and Delete are
    // deferred/batched
    List<ClientProtos.Action> mutations = null;
    long maxQuotaResultSize = Math.min(maxScannerResultSize, quota.getReadAvailable());
    IOException sizeIOE = null;
    Object lastBlock = null;
    ClientProtos.ResultOrException.Builder resultOrExceptionBuilder = ResultOrException.newBuilder();
    boolean hasResultOrException = false;
    for (ClientProtos.Action action : actions.getActionList()) {
        hasResultOrException = false;
        resultOrExceptionBuilder.clear();
        try {
            Result r = null;
            if (context != null && context.isRetryImmediatelySupported() && (context.getResponseCellSize() > maxQuotaResultSize || context.getResponseBlockSize() + context.getResponseExceptionSize() > maxQuotaResultSize)) {
                // change after the response size limit is reached.
                if (sizeIOE == null) {
                    // We don't need the stack un-winding do don't throw the exception.
                    // Throwing will kill the JVM's JIT.
                    // 
                    // Instead just create the exception and then store it.
                    sizeIOE = new MultiActionResultTooLarge("Max size exceeded" + " CellSize: " + context.getResponseCellSize() + " BlockSize: " + context.getResponseBlockSize());
                    // Only report the exception once since there's only one request that
                    // caused the exception. Otherwise this number will dominate the exceptions count.
                    rpcServer.getMetrics().exception(sizeIOE);
                }
                // Now that there's an exception is known to be created
                // use it for the response.
                // 
                // This will create a copy in the builder.
                NameBytesPair pair = ResponseConverter.buildException(sizeIOE);
                resultOrExceptionBuilder.setException(pair);
                context.incrementResponseExceptionSize(pair.getSerializedSize());
                resultOrExceptionBuilder.setIndex(action.getIndex());
                builder.addResultOrException(resultOrExceptionBuilder.build());
                skipCellsForMutation(action, cellScanner);
                continue;
            }
            if (action.hasGet()) {
                long before = EnvironmentEdgeManager.currentTime();
                ClientProtos.Get pbGet = action.getGet();
                // they are; its a problem for non-native clients like asynchbase. HBASE-20225.
                if (pbGet.hasClosestRowBefore() && pbGet.getClosestRowBefore()) {
                    throw new UnknownProtocolException("Is this a pre-hbase-1.0.0 or asynchbase client? " + "Client is invoking getClosestRowBefore removed in hbase-2.0.0 replaced by " + "reverse Scan.");
                }
                try {
                    Get get = ProtobufUtil.toGet(pbGet);
                    if (context != null) {
                        r = get(get, (region), closeCallBack, context);
                    } else {
                        r = region.get(get);
                    }
                } finally {
                    final MetricsRegionServer metricsRegionServer = server.getMetrics();
                    if (metricsRegionServer != null) {
                        metricsRegionServer.updateGet(region.getTableDescriptor().getTableName(), EnvironmentEdgeManager.currentTime() - before);
                    }
                }
            } else if (action.hasServiceCall()) {
                hasResultOrException = true;
                Message result = execServiceOnRegion(region, action.getServiceCall());
                ClientProtos.CoprocessorServiceResult.Builder serviceResultBuilder = ClientProtos.CoprocessorServiceResult.newBuilder();
                resultOrExceptionBuilder.setServiceResult(serviceResultBuilder.setValue(serviceResultBuilder.getValueBuilder().setName(result.getClass().getName()).setValue(UnsafeByteOperations.unsafeWrap(result.toByteArray()))));
            } else if (action.hasMutation()) {
                MutationType type = action.getMutation().getMutateType();
                if (type != MutationType.PUT && type != MutationType.DELETE && mutations != null && !mutations.isEmpty()) {
                    // Flush out any Puts or Deletes already collected.
                    doNonAtomicBatchOp(builder, region, quota, mutations, cellScanner, spaceQuotaEnforcement);
                    mutations.clear();
                }
                switch(type) {
                    case APPEND:
                        r = append(region, quota, action.getMutation(), cellScanner, nonceGroup, spaceQuotaEnforcement);
                        break;
                    case INCREMENT:
                        r = increment(region, quota, action.getMutation(), cellScanner, nonceGroup, spaceQuotaEnforcement);
                        break;
                    case PUT:
                    case DELETE:
                        // Collect the individual mutations and apply in a batch
                        if (mutations == null) {
                            mutations = new ArrayList<>(actions.getActionCount());
                        }
                        mutations.add(action);
                        break;
                    default:
                        throw new DoNotRetryIOException("Unsupported mutate type: " + type.name());
                }
            } else {
                throw new HBaseIOException("Unexpected Action type");
            }
            if (r != null) {
                ClientProtos.Result pbResult = null;
                if (isClientCellBlockSupport(context)) {
                    pbResult = ProtobufUtil.toResultNoData(r);
                    // Hard to guess the size here.  Just make a rough guess.
                    if (cellsToReturn == null) {
                        cellsToReturn = new ArrayList<>();
                    }
                    cellsToReturn.add(r);
                } else {
                    pbResult = ProtobufUtil.toResult(r);
                }
                lastBlock = addSize(context, r, lastBlock);
                hasResultOrException = true;
                resultOrExceptionBuilder.setResult(pbResult);
            }
        // Could get to here and there was no result and no exception.  Presumes we added
        // a Put or Delete to the collecting Mutations List for adding later.  In this
        // case the corresponding ResultOrException instance for the Put or Delete will be added
        // down in the doNonAtomicBatchOp method call rather than up here.
        } catch (IOException ie) {
            rpcServer.getMetrics().exception(ie);
            hasResultOrException = true;
            NameBytesPair pair = ResponseConverter.buildException(ie);
            resultOrExceptionBuilder.setException(pair);
            context.incrementResponseExceptionSize(pair.getSerializedSize());
        }
        if (hasResultOrException) {
            // Propagate index.
            resultOrExceptionBuilder.setIndex(action.getIndex());
            builder.addResultOrException(resultOrExceptionBuilder.build());
        }
    }
    // Finish up any outstanding mutations
    if (!CollectionUtils.isEmpty(mutations)) {
        doNonAtomicBatchOp(builder, region, quota, mutations, cellScanner, spaceQuotaEnforcement);
    }
    return cellsToReturn;
}
Also used : MultiActionResultTooLarge(org.apache.hadoop.hbase.MultiActionResultTooLarge) RegionAction(org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.RegionAction) Action(org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.Action) MutationType(org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.MutationProto.MutationType) Message(org.apache.hbase.thirdparty.com.google.protobuf.Message) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) CacheBuilder(org.apache.hbase.thirdparty.com.google.common.cache.CacheBuilder) CacheEvictionStatsBuilder(org.apache.hadoop.hbase.CacheEvictionStatsBuilder) Action(org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.Action) IOException(java.io.IOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) UncheckedIOException(java.io.UncheckedIOException) RegionActionResult(org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.RegionActionResult) Result(org.apache.hadoop.hbase.client.Result) CheckAndMutateResult(org.apache.hadoop.hbase.client.CheckAndMutateResult) NameBytesPair(org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.NameBytesPair) Get(org.apache.hadoop.hbase.client.Get) MutableObject(org.apache.commons.lang3.mutable.MutableObject) UnknownProtocolException(org.apache.hadoop.hbase.exceptions.UnknownProtocolException) ResultOrException(org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.ResultOrException) ClientProtos(org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos)

Example 4 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class BaseLoadBalancer method createCluster.

private BalancerClusterState createCluster(List<ServerName> servers, Collection<RegionInfo> regions) throws HBaseIOException {
    boolean hasRegionReplica = false;
    try {
        if (provider != null) {
            hasRegionReplica = provider.hasRegionReplica(regions);
        }
    } catch (IOException ioe) {
        throw new HBaseIOException(ioe);
    }
    // Get the snapshot of the current assignments for the regions in question, and then create
    // a cluster out of it. Note that we might have replicas already assigned to some servers
    // earlier. So we want to get the snapshot to see those assignments, but this will only contain
    // replicas of the regions that are passed (for performance).
    Map<ServerName, List<RegionInfo>> clusterState = null;
    if (!hasRegionReplica) {
        clusterState = getRegionAssignmentsByServer(regions);
    } else {
        // for the case where we have region replica it is better we get the entire cluster's snapshot
        clusterState = getRegionAssignmentsByServer(null);
    }
    for (ServerName server : servers) {
        if (!clusterState.containsKey(server)) {
            clusterState.put(server, Collections.emptyList());
        }
    }
    return new BalancerClusterState(regions, clusterState, null, this.regionFinder, rackManager);
}
Also used : HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) ServerName(org.apache.hadoop.hbase.ServerName) ArrayList(java.util.ArrayList) List(java.util.List) IOException(java.io.IOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException)

Example 5 with HBaseIOException

use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.

the class FavoredStochasticBalancer method roundRobinAssignment.

/**
 * Round robin assignment: Segregate the regions into two types:
 *
 * 1. The regions that have favored node assignment where at least one of the favored node
 * is still alive. In this case, try to adhere to the current favored nodes assignment as
 * much as possible - i.e., if the current primary is gone, then make the secondary or
 * tertiary as the new host for the region (based on their current load). Note that we don't
 * change the favored node assignments here (even though one or more favored node is
 * currently down). That will be done by the admin operations.
 *
 * 2. The regions that currently don't have favored node assignments. Generate favored nodes
 * for them and then assign. Generate the primary fn in round robin fashion and generate
 * secondary and tertiary as per favored nodes constraints.
 */
@Override
@NonNull
public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions, List<ServerName> servers) throws HBaseIOException {
    metricsBalancer.incrMiscInvocations();
    Map<ServerName, List<RegionInfo>> assignmentMap = new HashMap<>();
    if (regions.isEmpty()) {
        return assignmentMap;
    }
    Set<RegionInfo> regionSet = new HashSet<>(regions);
    try {
        FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager);
        helper.initialize();
        Set<RegionInfo> systemRegions = FavoredNodesManager.filterNonFNApplicableRegions(regionSet);
        regionSet.removeAll(systemRegions);
        // Assign all system regions
        Map<ServerName, List<RegionInfo>> systemAssignments = super.roundRobinAssignment(Lists.newArrayList(systemRegions), servers);
        // Segregate favored and non-favored nodes regions and assign accordingly.
        Pair<Map<ServerName, List<RegionInfo>>, List<RegionInfo>> segregatedRegions = segregateRegionsAndAssignRegionsWithFavoredNodes(regionSet, servers);
        Map<ServerName, List<RegionInfo>> regionsWithFavoredNodesMap = segregatedRegions.getFirst();
        Map<ServerName, List<RegionInfo>> regionsWithoutFN = generateFNForRegionsWithoutFN(helper, segregatedRegions.getSecond());
        // merge the assignment maps
        mergeAssignmentMaps(assignmentMap, systemAssignments);
        mergeAssignmentMaps(assignmentMap, regionsWithFavoredNodesMap);
        mergeAssignmentMaps(assignmentMap, regionsWithoutFN);
    } catch (Exception ex) {
        throw new HBaseIOException("Encountered exception while doing favored-nodes assignment " + ex + " Falling back to regular assignment", ex);
    }
    return assignmentMap;
}
Also used : HashMap(java.util.HashMap) FavoredNodeAssignmentHelper(org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) ServerName(org.apache.hadoop.hbase.ServerName) ArrayList(java.util.ArrayList) List(java.util.List) RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) HashMap(java.util.HashMap) Map(java.util.Map) IOException(java.io.IOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) HashSet(java.util.HashSet) NonNull(edu.umd.cs.findbugs.annotations.NonNull)

Aggregations

HBaseIOException (org.apache.hadoop.hbase.HBaseIOException)36 IOException (java.io.IOException)19 ServerName (org.apache.hadoop.hbase.ServerName)17 ArrayList (java.util.ArrayList)13 RegionInfo (org.apache.hadoop.hbase.client.RegionInfo)13 List (java.util.List)8 HashMap (java.util.HashMap)7 InterruptedIOException (java.io.InterruptedIOException)5 Map (java.util.Map)5 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)5 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)5 Test (org.junit.Test)5 TreeMap (java.util.TreeMap)4 Configuration (org.apache.hadoop.conf.Configuration)4 NonNull (edu.umd.cs.findbugs.annotations.NonNull)3 ExecutionException (java.util.concurrent.ExecutionException)3 RegionLocations (org.apache.hadoop.hbase.RegionLocations)3 TableName (org.apache.hadoop.hbase.TableName)3 FavoredNodeAssignmentHelper (org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper)3 RSGroupAdminEndpoint (org.apache.hadoop.hbase.rsgroup.RSGroupAdminEndpoint)3