use of org.apache.hadoop.hbase.CallQueueTooBigException in project hbase by apache.
the class ConnectionImplementation method locateRegionInMeta.
/*
* Search the hbase:meta table for the HRegionLocation
* info that contains the table and row we're seeking.
*/
private RegionLocations locateRegionInMeta(TableName tableName, byte[] row, boolean useCache, boolean retry, int replicaId) throws IOException {
// we already have the region.
if (useCache) {
RegionLocations locations = getCachedLocation(tableName, row);
if (locations != null && locations.getRegionLocation(replicaId) != null) {
return locations;
}
}
// build the key of the meta region we should be looking for.
// the extra 9's on the end are necessary to allow "exact" matches
// without knowing the precise region names.
byte[] metaKey = HRegionInfo.createRegionName(tableName, row, HConstants.NINES, false);
Scan s = new Scan();
s.setReversed(true);
s.withStartRow(metaKey);
s.addFamily(HConstants.CATALOG_FAMILY);
s.setOneRowLimit();
if (this.useMetaReplicas) {
s.setConsistency(Consistency.TIMELINE);
}
int maxAttempts = (retry ? numTries : 1);
for (int tries = 0; true; tries++) {
if (tries >= maxAttempts) {
throw new NoServerForRegionException("Unable to find region for " + Bytes.toStringBinary(row) + " in " + tableName + " after " + tries + " tries.");
}
if (useCache) {
RegionLocations locations = getCachedLocation(tableName, row);
if (locations != null && locations.getRegionLocation(replicaId) != null) {
return locations;
}
} else {
// If we are not supposed to be using the cache, delete any existing cached location
// so it won't interfere.
metaCache.clearCache(tableName, row);
}
// Query the meta region
long pauseBase = this.pause;
try {
Result regionInfoRow = null;
s.resetMvccReadPoint();
try (ReversedClientScanner rcs = new ReversedClientScanner(conf, s, TableName.META_TABLE_NAME, this, rpcCallerFactory, rpcControllerFactory, getMetaLookupPool(), 0)) {
regionInfoRow = rcs.next();
}
if (regionInfoRow == null) {
throw new TableNotFoundException(tableName);
}
// convert the row result into the HRegionLocation we need!
RegionLocations locations = MetaTableAccessor.getRegionLocations(regionInfoRow);
if (locations == null || locations.getRegionLocation(replicaId) == null) {
throw new IOException("HRegionInfo was null in " + tableName + ", row=" + regionInfoRow);
}
HRegionInfo regionInfo = locations.getRegionLocation(replicaId).getRegionInfo();
if (regionInfo == null) {
throw new IOException("HRegionInfo was null or empty in " + TableName.META_TABLE_NAME + ", row=" + regionInfoRow);
}
// possible we got a region of a different table...
if (!regionInfo.getTable().equals(tableName)) {
throw new TableNotFoundException("Table '" + tableName + "' was not found, got: " + regionInfo.getTable() + ".");
}
if (regionInfo.isSplit()) {
throw new RegionOfflineException("the only available region for" + " the required row is a split parent," + " the daughters should be online soon: " + regionInfo.getRegionNameAsString());
}
if (regionInfo.isOffline()) {
throw new RegionOfflineException("the region is offline, could" + " be caused by a disable table call: " + regionInfo.getRegionNameAsString());
}
ServerName serverName = locations.getRegionLocation(replicaId).getServerName();
if (serverName == null) {
throw new NoServerForRegionException("No server address listed " + "in " + TableName.META_TABLE_NAME + " for region " + regionInfo.getRegionNameAsString() + " containing row " + Bytes.toStringBinary(row));
}
if (isDeadServer(serverName)) {
throw new RegionServerStoppedException("hbase:meta says the region " + regionInfo.getRegionNameAsString() + " is managed by the server " + serverName + ", but it is dead.");
}
// Instantiate the location
cacheLocation(tableName, locations);
return locations;
} catch (TableNotFoundException e) {
// from the HTable constructor.
throw e;
} catch (IOException e) {
ExceptionUtil.rethrowIfInterrupt(e);
if (e instanceof RemoteException) {
e = ((RemoteException) e).unwrapRemoteException();
}
if (e instanceof CallQueueTooBigException) {
// Give a special check on CallQueueTooBigException, see #HBASE-17114
pauseBase = this.pauseForCQTBE;
}
if (tries < maxAttempts - 1) {
if (LOG.isDebugEnabled()) {
LOG.debug("locateRegionInMeta parentTable=" + TableName.META_TABLE_NAME + ", metaLocation=" + ", attempt=" + tries + " of " + maxAttempts + " failed; retrying after sleep of " + ConnectionUtils.getPauseTime(pauseBase, tries) + " because: " + e.getMessage());
}
} else {
throw e;
}
// Only relocate the parent region if necessary
if (!(e instanceof RegionOfflineException || e instanceof NoServerForRegionException)) {
relocateRegion(TableName.META_TABLE_NAME, metaKey, replicaId);
}
}
try {
Thread.sleep(ConnectionUtils.getPauseTime(pauseBase, tries));
} catch (InterruptedException e) {
throw new InterruptedIOException("Giving up trying to location region in " + "meta: thread is interrupted.");
}
}
}
use of org.apache.hadoop.hbase.CallQueueTooBigException in project hbase by apache.
the class TestAsyncProcess method testRetryPauseWithCallQueueTooBigException.
/**
* Test and make sure we could use a special pause setting when retry with
* CallQueueTooBigException, see HBASE-17114
* @throws Exception if unexpected error happened during test
*/
@Test
public void testRetryPauseWithCallQueueTooBigException() throws Exception {
Configuration myConf = new Configuration(CONF);
final long specialPause = 500L;
final int retries = 1;
myConf.setLong(HConstants.HBASE_CLIENT_PAUSE_FOR_CQTBE, specialPause);
myConf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, retries);
ClusterConnection conn = new MyConnectionImpl(myConf);
AsyncProcessWithFailure ap = new AsyncProcessWithFailure(conn, myConf, new CallQueueTooBigException());
BufferedMutatorParams bufferParam = createBufferedMutatorParams(ap, DUMMY_TABLE);
BufferedMutatorImpl mutator = new BufferedMutatorImpl(conn, bufferParam, ap);
Assert.assertNotNull(mutator.getAsyncProcess().createServerErrorTracker());
Put p = createPut(1, true);
mutator.mutate(p);
long startTime = System.currentTimeMillis();
try {
mutator.flush();
Assert.fail();
} catch (RetriesExhaustedWithDetailsException expected) {
}
long actualSleep = System.currentTimeMillis() - startTime;
long expectedSleep = 0L;
for (int i = 0; i < retries; i++) {
expectedSleep += ConnectionUtils.getPauseTime(specialPause, i);
// Prevent jitter in CollectionUtils#getPauseTime to affect result
actualSleep += (long) (specialPause * 0.01f);
}
LOG.debug("Expected to sleep " + expectedSleep + "ms, actually slept " + actualSleep + "ms");
Assert.assertTrue("Expected to sleep " + expectedSleep + " but actually " + actualSleep + "ms", actualSleep >= expectedSleep);
// check and confirm normal IOE will use the normal pause
final long normalPause = myConf.getLong(HConstants.HBASE_CLIENT_PAUSE, HConstants.DEFAULT_HBASE_CLIENT_PAUSE);
ap = new AsyncProcessWithFailure(conn, myConf, new IOException());
bufferParam = createBufferedMutatorParams(ap, DUMMY_TABLE);
mutator = new BufferedMutatorImpl(conn, bufferParam, ap);
Assert.assertNotNull(mutator.getAsyncProcess().createServerErrorTracker());
mutator.mutate(p);
startTime = System.currentTimeMillis();
try {
mutator.flush();
Assert.fail();
} catch (RetriesExhaustedWithDetailsException expected) {
}
actualSleep = System.currentTimeMillis() - startTime;
expectedSleep = 0L;
for (int i = 0; i < retries; i++) {
expectedSleep += ConnectionUtils.getPauseTime(normalPause, i);
}
// plus an additional pause to balance the program execution time
expectedSleep += normalPause;
LOG.debug("Expected to sleep " + expectedSleep + "ms, actually slept " + actualSleep + "ms");
Assert.assertTrue("Slept for too long: " + actualSleep + "ms", actualSleep <= expectedSleep);
}
use of org.apache.hadoop.hbase.CallQueueTooBigException in project hbase by apache.
the class TestAsyncProcess method testCallQueueTooLarge.
@Test
public void testCallQueueTooLarge() throws IOException {
ClusterConnection conn = new MyConnectionImpl(CONF);
AsyncProcessWithFailure ap = new AsyncProcessWithFailure(conn, CONF, new CallQueueTooBigException());
BufferedMutatorParams bufferParam = createBufferedMutatorParams(ap, DUMMY_TABLE);
BufferedMutatorImpl mutator = new BufferedMutatorImpl(conn, bufferParam, ap);
Assert.assertNotNull(ap.createServerErrorTracker());
Put p = createPut(1, true);
mutator.mutate(p);
try {
mutator.flush();
Assert.fail();
} catch (RetriesExhaustedWithDetailsException expected) {
}
// Checking that the ErrorsServers came into play and didn't make us stop immediately
Assert.assertEquals(NB_RETRIES + 1, ap.callsCt.get());
}
use of org.apache.hadoop.hbase.CallQueueTooBigException in project hbase by apache.
the class RpcRetryingCallerImpl method callWithRetries.
@Override
public T callWithRetries(RetryingCallable<T> callable, int callTimeout) throws IOException, RuntimeException {
List<RetriesExhaustedException.ThrowableWithExtraContext> exceptions = new ArrayList<>();
tracker.start();
context.clear();
for (int tries = 0; ; tries++) {
long expectedSleep;
try {
// bad cache entries are cleared in the call to RetryingCallable#throwable() in catch block
callable.prepare(tries != 0);
interceptor.intercept(context.prepare(callable, tries));
return callable.call(getTimeout(callTimeout));
} catch (PreemptiveFastFailException e) {
throw e;
} catch (Throwable t) {
Throwable e = t.getCause();
ExceptionUtil.rethrowIfInterrupt(t);
// translateException throws exception when should not retry: i.e. when request is bad.
interceptor.handleFailure(context, t);
t = translateException(t);
if (tries > startLogErrorsCnt) {
LOG.info("Call exception, tries=" + tries + ", maxAttempts=" + maxAttempts + ", started=" + (EnvironmentEdgeManager.currentTime() - tracker.getStartTime()) + " ms ago, " + "cancelled=" + cancelled.get() + ", msg=" + t.getMessage() + " " + callable.getExceptionMessageAdditionalDetail());
}
callable.throwable(t, maxAttempts != 1);
RetriesExhaustedException.ThrowableWithExtraContext qt = new RetriesExhaustedException.ThrowableWithExtraContext(t, EnvironmentEdgeManager.currentTime(), toString());
exceptions.add(qt);
if (tries >= maxAttempts - 1) {
throw new RetriesExhaustedException(tries, exceptions);
}
// If the server is dead, we need to wait a little before retrying, to give
// a chance to the regions to be moved
// get right pause time, start by RETRY_BACKOFF[0] * pauseBase, where pauseBase might be
// special when encountering CallQueueTooBigException, see #HBASE-17114
long pauseBase = (t instanceof CallQueueTooBigException) ? pauseForCQTBE : pause;
expectedSleep = callable.sleep(pauseBase, tries);
// If, after the planned sleep, there won't be enough time left, we stop now.
long duration = singleCallDuration(expectedSleep);
if (duration > callTimeout) {
String msg = "callTimeout=" + callTimeout + ", callDuration=" + duration + ": " + t.getMessage() + " " + callable.getExceptionMessageAdditionalDetail();
throw (SocketTimeoutException) (new SocketTimeoutException(msg).initCause(t));
}
} finally {
interceptor.updateFailureInfo(context);
}
try {
if (expectedSleep > 0) {
synchronized (cancelled) {
if (cancelled.get())
return null;
cancelled.wait(expectedSleep);
}
}
if (cancelled.get())
return null;
} catch (InterruptedException e) {
throw new InterruptedIOException("Interrupted after " + tries + " tries while maxAttempts=" + maxAttempts);
}
}
}
use of org.apache.hadoop.hbase.CallQueueTooBigException in project hbase by apache.
the class HRegionServer method reportRegionStateTransition.
@Override
public boolean reportRegionStateTransition(final RegionStateTransitionContext context) {
if (TEST_SKIP_REPORTING_TRANSITION) {
return skipReportingTransition(context);
}
final ReportRegionStateTransitionRequest request = createReportRegionStateTransitionRequest(context);
int tries = 0;
long pauseTime = this.retryPauseTime;
// HRegionServer does down.
while (this.asyncClusterConnection != null && !this.asyncClusterConnection.isClosed()) {
RegionServerStatusService.BlockingInterface rss = rssStub;
try {
if (rss == null) {
createRegionServerStatusStub();
continue;
}
ReportRegionStateTransitionResponse response = rss.reportRegionStateTransition(null, request);
if (response.hasErrorMessage()) {
LOG.info("TRANSITION FAILED " + request + ": " + response.getErrorMessage());
break;
}
// know if were successful after an attempt showed in logs as failed.
if (tries > 0 || LOG.isTraceEnabled()) {
LOG.info("TRANSITION REPORTED " + request);
}
// NOTE: Return mid-method!!!
return true;
} catch (ServiceException se) {
IOException ioe = ProtobufUtil.getRemoteException(se);
boolean pause = ioe instanceof ServerNotRunningYetException || ioe instanceof PleaseHoldException || ioe instanceof CallQueueTooBigException;
if (pause) {
// Do backoff else we flood the Master with requests.
pauseTime = ConnectionUtils.getPauseTime(this.retryPauseTime, tries);
} else {
// Reset.
pauseTime = this.retryPauseTime;
}
LOG.info("Failed report transition " + TextFormat.shortDebugString(request) + "; retry (#" + tries + ")" + (pause ? " after " + pauseTime + "ms delay (Master is coming online...)." : " immediately."), ioe);
if (pause) {
Threads.sleep(pauseTime);
}
tries++;
if (rssStub == rss) {
rssStub = null;
}
}
}
return false;
}
Aggregations