use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.
the class AssignmentManager method onRegionFailedOpen.
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "AT_OPERATION_SEQUENCE_ON_CONCURRENT_ABSTRACTION", justification = "Worth fixing but not the end of the world.")
private String onRegionFailedOpen(final RegionState current, final HRegionInfo hri, final ServerName serverName) {
// it could be a reportRegionTransition RPC retry.
if (current == null || !current.isOpeningOrFailedOpenOnServer(serverName)) {
return hri.getShortNameToLog() + " is not opening on " + serverName;
}
// Just return in case of retrying
if (current.isFailedOpen()) {
return null;
}
String encodedName = hri.getEncodedName();
// FindBugs: AT_OPERATION_SEQUENCE_ON_CONCURRENT_ABSTRACTION Worth fixing!!!
AtomicInteger failedOpenCount = failedOpenTracker.get(encodedName);
if (failedOpenCount == null) {
failedOpenCount = new AtomicInteger();
// No need to use putIfAbsent, or extra synchronization since
// this whole handleRegion block is locked on the encoded region
// name, and failedOpenTracker is updated only in this block
failedOpenTracker.put(encodedName, failedOpenCount);
}
if (failedOpenCount.incrementAndGet() >= maximumAttempts && !hri.isMetaRegion()) {
regionStates.updateRegionState(hri, State.FAILED_OPEN);
// remove the tracking info to save memory, also reset
// the count for next open initiative
failedOpenTracker.remove(encodedName);
} else {
if (hri.isMetaRegion() && failedOpenCount.get() >= maximumAttempts) {
// Log a warning message if a meta region failedOpenCount exceeds maximumAttempts
// so that we are aware of potential problem if it persists for a long time.
LOG.warn("Failed to open the hbase:meta region " + hri.getRegionNameAsString() + " after" + failedOpenCount.get() + " retries. Continue retrying.");
}
// Handle this the same as if it were opened and then closed.
RegionState regionState = regionStates.updateRegionState(hri, State.CLOSED);
if (regionState != null) {
// destination and the same is updated in the region plan. (HBASE-5546)
if (getTableStateManager().isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING) || replicasToClose.contains(hri)) {
offlineDisabledRegion(hri);
return null;
}
regionStates.updateRegionState(hri, RegionState.State.CLOSED);
// This below has to do w/ online enable/disable of a table
removeClosedRegion(hri);
try {
getRegionPlan(hri, true);
} catch (HBaseIOException e) {
LOG.warn("Failed to get region plan", e);
}
// Have the current thread sleep a bit before resubmitting the RPC request
long sleepTime = backoffPolicy.getBackoffTime(retryConfig, failedOpenTracker.get(encodedName).get());
invokeAssignLater(hri, sleepTime);
}
}
// Null means no error
return null;
}
use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.
the class FavoredNodeAssignmentHelper method generateFavoredNodes.
/*
* Generate favored nodes for a region.
*
* Choose a random server as primary and then choose secondary and tertiary FN so its spread
* across two racks.
*/
List<ServerName> generateFavoredNodes(HRegionInfo hri) throws IOException {
List<ServerName> favoredNodesForRegion = new ArrayList<>(FAVORED_NODES_NUM);
ServerName primary = servers.get(random.nextInt(servers.size()));
favoredNodesForRegion.add(ServerName.valueOf(primary.getHostAndPort(), ServerName.NON_STARTCODE));
Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<>(1);
primaryRSMap.put(hri, primary);
Map<HRegionInfo, ServerName[]> secondaryAndTertiaryRSMap = placeSecondaryAndTertiaryRS(primaryRSMap);
ServerName[] secondaryAndTertiaryNodes = secondaryAndTertiaryRSMap.get(hri);
if (secondaryAndTertiaryNodes != null && secondaryAndTertiaryNodes.length == 2) {
for (ServerName sn : secondaryAndTertiaryNodes) {
favoredNodesForRegion.add(ServerName.valueOf(sn.getHostAndPort(), ServerName.NON_STARTCODE));
}
return favoredNodesForRegion;
} else {
throw new HBaseIOException("Unable to generate secondary and tertiary favored nodes.");
}
}
use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.
the class RSRpcServices method doNonAtomicRegionMutation.
/**
* Run through the regionMutation <code>rm</code> and per Mutation, do the work, and then when
* done, add an instance of a {@link ResultOrException} that corresponds to each Mutation.
* @param cellsToReturn Could be null. May be allocated in this method. This is what this
* method returns as a 'result'.
* @param closeCallBack the callback to be used with multigets
* @param context the current RpcCallContext
* @return Return the <code>cellScanner</code> passed
*/
private List<CellScannable> doNonAtomicRegionMutation(final HRegion region, final OperationQuota quota, final RegionAction actions, final CellScanner cellScanner, final RegionActionResult.Builder builder, List<CellScannable> cellsToReturn, long nonceGroup, final RegionScannersCloseCallBack closeCallBack, RpcCallContext context, ActivePolicyEnforcement spaceQuotaEnforcement) {
// Gather up CONTIGUOUS Puts and Deletes in this mutations List. Idea is that rather than do
// one at a time, we instead pass them in batch. Be aware that the corresponding
// ResultOrException instance that matches each Put or Delete is then added down in the
// doNonAtomicBatchOp call. We should be staying aligned though the Put and Delete are
// deferred/batched
List<ClientProtos.Action> mutations = null;
long maxQuotaResultSize = Math.min(maxScannerResultSize, quota.getReadAvailable());
IOException sizeIOE = null;
Object lastBlock = null;
ClientProtos.ResultOrException.Builder resultOrExceptionBuilder = ResultOrException.newBuilder();
boolean hasResultOrException = false;
for (ClientProtos.Action action : actions.getActionList()) {
hasResultOrException = false;
resultOrExceptionBuilder.clear();
try {
Result r = null;
if (context != null && context.isRetryImmediatelySupported() && (context.getResponseCellSize() > maxQuotaResultSize || context.getResponseBlockSize() + context.getResponseExceptionSize() > maxQuotaResultSize)) {
// change after the response size limit is reached.
if (sizeIOE == null) {
// We don't need the stack un-winding do don't throw the exception.
// Throwing will kill the JVM's JIT.
//
// Instead just create the exception and then store it.
sizeIOE = new MultiActionResultTooLarge("Max size exceeded" + " CellSize: " + context.getResponseCellSize() + " BlockSize: " + context.getResponseBlockSize());
// Only report the exception once since there's only one request that
// caused the exception. Otherwise this number will dominate the exceptions count.
rpcServer.getMetrics().exception(sizeIOE);
}
// Now that there's an exception is known to be created
// use it for the response.
//
// This will create a copy in the builder.
NameBytesPair pair = ResponseConverter.buildException(sizeIOE);
resultOrExceptionBuilder.setException(pair);
context.incrementResponseExceptionSize(pair.getSerializedSize());
resultOrExceptionBuilder.setIndex(action.getIndex());
builder.addResultOrException(resultOrExceptionBuilder.build());
skipCellsForMutation(action, cellScanner);
continue;
}
if (action.hasGet()) {
long before = EnvironmentEdgeManager.currentTime();
ClientProtos.Get pbGet = action.getGet();
// they are; its a problem for non-native clients like asynchbase. HBASE-20225.
if (pbGet.hasClosestRowBefore() && pbGet.getClosestRowBefore()) {
throw new UnknownProtocolException("Is this a pre-hbase-1.0.0 or asynchbase client? " + "Client is invoking getClosestRowBefore removed in hbase-2.0.0 replaced by " + "reverse Scan.");
}
try {
Get get = ProtobufUtil.toGet(pbGet);
if (context != null) {
r = get(get, (region), closeCallBack, context);
} else {
r = region.get(get);
}
} finally {
final MetricsRegionServer metricsRegionServer = server.getMetrics();
if (metricsRegionServer != null) {
metricsRegionServer.updateGet(region.getTableDescriptor().getTableName(), EnvironmentEdgeManager.currentTime() - before);
}
}
} else if (action.hasServiceCall()) {
hasResultOrException = true;
Message result = execServiceOnRegion(region, action.getServiceCall());
ClientProtos.CoprocessorServiceResult.Builder serviceResultBuilder = ClientProtos.CoprocessorServiceResult.newBuilder();
resultOrExceptionBuilder.setServiceResult(serviceResultBuilder.setValue(serviceResultBuilder.getValueBuilder().setName(result.getClass().getName()).setValue(UnsafeByteOperations.unsafeWrap(result.toByteArray()))));
} else if (action.hasMutation()) {
MutationType type = action.getMutation().getMutateType();
if (type != MutationType.PUT && type != MutationType.DELETE && mutations != null && !mutations.isEmpty()) {
// Flush out any Puts or Deletes already collected.
doNonAtomicBatchOp(builder, region, quota, mutations, cellScanner, spaceQuotaEnforcement);
mutations.clear();
}
switch(type) {
case APPEND:
r = append(region, quota, action.getMutation(), cellScanner, nonceGroup, spaceQuotaEnforcement);
break;
case INCREMENT:
r = increment(region, quota, action.getMutation(), cellScanner, nonceGroup, spaceQuotaEnforcement);
break;
case PUT:
case DELETE:
// Collect the individual mutations and apply in a batch
if (mutations == null) {
mutations = new ArrayList<>(actions.getActionCount());
}
mutations.add(action);
break;
default:
throw new DoNotRetryIOException("Unsupported mutate type: " + type.name());
}
} else {
throw new HBaseIOException("Unexpected Action type");
}
if (r != null) {
ClientProtos.Result pbResult = null;
if (isClientCellBlockSupport(context)) {
pbResult = ProtobufUtil.toResultNoData(r);
// Hard to guess the size here. Just make a rough guess.
if (cellsToReturn == null) {
cellsToReturn = new ArrayList<>();
}
cellsToReturn.add(r);
} else {
pbResult = ProtobufUtil.toResult(r);
}
lastBlock = addSize(context, r, lastBlock);
hasResultOrException = true;
resultOrExceptionBuilder.setResult(pbResult);
}
// Could get to here and there was no result and no exception. Presumes we added
// a Put or Delete to the collecting Mutations List for adding later. In this
// case the corresponding ResultOrException instance for the Put or Delete will be added
// down in the doNonAtomicBatchOp method call rather than up here.
} catch (IOException ie) {
rpcServer.getMetrics().exception(ie);
hasResultOrException = true;
NameBytesPair pair = ResponseConverter.buildException(ie);
resultOrExceptionBuilder.setException(pair);
context.incrementResponseExceptionSize(pair.getSerializedSize());
}
if (hasResultOrException) {
// Propagate index.
resultOrExceptionBuilder.setIndex(action.getIndex());
builder.addResultOrException(resultOrExceptionBuilder.build());
}
}
// Finish up any outstanding mutations
if (!CollectionUtils.isEmpty(mutations)) {
doNonAtomicBatchOp(builder, region, quota, mutations, cellScanner, spaceQuotaEnforcement);
}
return cellsToReturn;
}
use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.
the class BaseLoadBalancer method createCluster.
private BalancerClusterState createCluster(List<ServerName> servers, Collection<RegionInfo> regions) throws HBaseIOException {
boolean hasRegionReplica = false;
try {
if (provider != null) {
hasRegionReplica = provider.hasRegionReplica(regions);
}
} catch (IOException ioe) {
throw new HBaseIOException(ioe);
}
// Get the snapshot of the current assignments for the regions in question, and then create
// a cluster out of it. Note that we might have replicas already assigned to some servers
// earlier. So we want to get the snapshot to see those assignments, but this will only contain
// replicas of the regions that are passed (for performance).
Map<ServerName, List<RegionInfo>> clusterState = null;
if (!hasRegionReplica) {
clusterState = getRegionAssignmentsByServer(regions);
} else {
// for the case where we have region replica it is better we get the entire cluster's snapshot
clusterState = getRegionAssignmentsByServer(null);
}
for (ServerName server : servers) {
if (!clusterState.containsKey(server)) {
clusterState.put(server, Collections.emptyList());
}
}
return new BalancerClusterState(regions, clusterState, null, this.regionFinder, rackManager);
}
use of org.apache.hadoop.hbase.HBaseIOException in project hbase by apache.
the class FavoredStochasticBalancer method roundRobinAssignment.
/**
* Round robin assignment: Segregate the regions into two types:
*
* 1. The regions that have favored node assignment where at least one of the favored node
* is still alive. In this case, try to adhere to the current favored nodes assignment as
* much as possible - i.e., if the current primary is gone, then make the secondary or
* tertiary as the new host for the region (based on their current load). Note that we don't
* change the favored node assignments here (even though one or more favored node is
* currently down). That will be done by the admin operations.
*
* 2. The regions that currently don't have favored node assignments. Generate favored nodes
* for them and then assign. Generate the primary fn in round robin fashion and generate
* secondary and tertiary as per favored nodes constraints.
*/
@Override
@NonNull
public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions, List<ServerName> servers) throws HBaseIOException {
metricsBalancer.incrMiscInvocations();
Map<ServerName, List<RegionInfo>> assignmentMap = new HashMap<>();
if (regions.isEmpty()) {
return assignmentMap;
}
Set<RegionInfo> regionSet = new HashSet<>(regions);
try {
FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager);
helper.initialize();
Set<RegionInfo> systemRegions = FavoredNodesManager.filterNonFNApplicableRegions(regionSet);
regionSet.removeAll(systemRegions);
// Assign all system regions
Map<ServerName, List<RegionInfo>> systemAssignments = super.roundRobinAssignment(Lists.newArrayList(systemRegions), servers);
// Segregate favored and non-favored nodes regions and assign accordingly.
Pair<Map<ServerName, List<RegionInfo>>, List<RegionInfo>> segregatedRegions = segregateRegionsAndAssignRegionsWithFavoredNodes(regionSet, servers);
Map<ServerName, List<RegionInfo>> regionsWithFavoredNodesMap = segregatedRegions.getFirst();
Map<ServerName, List<RegionInfo>> regionsWithoutFN = generateFNForRegionsWithoutFN(helper, segregatedRegions.getSecond());
// merge the assignment maps
mergeAssignmentMaps(assignmentMap, systemAssignments);
mergeAssignmentMaps(assignmentMap, regionsWithFavoredNodesMap);
mergeAssignmentMaps(assignmentMap, regionsWithoutFN);
} catch (Exception ex) {
throw new HBaseIOException("Encountered exception while doing favored-nodes assignment " + ex + " Falling back to regular assignment", ex);
}
return assignmentMap;
}
Aggregations