Search in sources :

Example 1 with CallQueueTooBigException

use of org.apache.hadoop.hbase.CallQueueTooBigException in project hbase by apache.

the class ConnectionImplementation method locateRegionInMeta.

/*
    * Search the hbase:meta table for the HRegionLocation
    * info that contains the table and row we're seeking.
    */
private RegionLocations locateRegionInMeta(TableName tableName, byte[] row, boolean useCache, boolean retry, int replicaId) throws IOException {
    // we already have the region.
    if (useCache) {
        RegionLocations locations = getCachedLocation(tableName, row);
        if (locations != null && locations.getRegionLocation(replicaId) != null) {
            return locations;
        }
    }
    // build the key of the meta region we should be looking for.
    // the extra 9's on the end are necessary to allow "exact" matches
    // without knowing the precise region names.
    byte[] metaKey = HRegionInfo.createRegionName(tableName, row, HConstants.NINES, false);
    Scan s = new Scan();
    s.setReversed(true);
    s.withStartRow(metaKey);
    s.addFamily(HConstants.CATALOG_FAMILY);
    s.setOneRowLimit();
    if (this.useMetaReplicas) {
        s.setConsistency(Consistency.TIMELINE);
    }
    int maxAttempts = (retry ? numTries : 1);
    for (int tries = 0; true; tries++) {
        if (tries >= maxAttempts) {
            throw new NoServerForRegionException("Unable to find region for " + Bytes.toStringBinary(row) + " in " + tableName + " after " + tries + " tries.");
        }
        if (useCache) {
            RegionLocations locations = getCachedLocation(tableName, row);
            if (locations != null && locations.getRegionLocation(replicaId) != null) {
                return locations;
            }
        } else {
            // If we are not supposed to be using the cache, delete any existing cached location
            // so it won't interfere.
            metaCache.clearCache(tableName, row);
        }
        // Query the meta region
        long pauseBase = this.pause;
        try {
            Result regionInfoRow = null;
            s.resetMvccReadPoint();
            try (ReversedClientScanner rcs = new ReversedClientScanner(conf, s, TableName.META_TABLE_NAME, this, rpcCallerFactory, rpcControllerFactory, getMetaLookupPool(), 0)) {
                regionInfoRow = rcs.next();
            }
            if (regionInfoRow == null) {
                throw new TableNotFoundException(tableName);
            }
            // convert the row result into the HRegionLocation we need!
            RegionLocations locations = MetaTableAccessor.getRegionLocations(regionInfoRow);
            if (locations == null || locations.getRegionLocation(replicaId) == null) {
                throw new IOException("HRegionInfo was null in " + tableName + ", row=" + regionInfoRow);
            }
            HRegionInfo regionInfo = locations.getRegionLocation(replicaId).getRegionInfo();
            if (regionInfo == null) {
                throw new IOException("HRegionInfo was null or empty in " + TableName.META_TABLE_NAME + ", row=" + regionInfoRow);
            }
            // possible we got a region of a different table...
            if (!regionInfo.getTable().equals(tableName)) {
                throw new TableNotFoundException("Table '" + tableName + "' was not found, got: " + regionInfo.getTable() + ".");
            }
            if (regionInfo.isSplit()) {
                throw new RegionOfflineException("the only available region for" + " the required row is a split parent," + " the daughters should be online soon: " + regionInfo.getRegionNameAsString());
            }
            if (regionInfo.isOffline()) {
                throw new RegionOfflineException("the region is offline, could" + " be caused by a disable table call: " + regionInfo.getRegionNameAsString());
            }
            ServerName serverName = locations.getRegionLocation(replicaId).getServerName();
            if (serverName == null) {
                throw new NoServerForRegionException("No server address listed " + "in " + TableName.META_TABLE_NAME + " for region " + regionInfo.getRegionNameAsString() + " containing row " + Bytes.toStringBinary(row));
            }
            if (isDeadServer(serverName)) {
                throw new RegionServerStoppedException("hbase:meta says the region " + regionInfo.getRegionNameAsString() + " is managed by the server " + serverName + ", but it is dead.");
            }
            // Instantiate the location
            cacheLocation(tableName, locations);
            return locations;
        } catch (TableNotFoundException e) {
            // from the HTable constructor.
            throw e;
        } catch (IOException e) {
            ExceptionUtil.rethrowIfInterrupt(e);
            if (e instanceof RemoteException) {
                e = ((RemoteException) e).unwrapRemoteException();
            }
            if (e instanceof CallQueueTooBigException) {
                // Give a special check on CallQueueTooBigException, see #HBASE-17114
                pauseBase = this.pauseForCQTBE;
            }
            if (tries < maxAttempts - 1) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("locateRegionInMeta parentTable=" + TableName.META_TABLE_NAME + ", metaLocation=" + ", attempt=" + tries + " of " + maxAttempts + " failed; retrying after sleep of " + ConnectionUtils.getPauseTime(pauseBase, tries) + " because: " + e.getMessage());
                }
            } else {
                throw e;
            }
            // Only relocate the parent region if necessary
            if (!(e instanceof RegionOfflineException || e instanceof NoServerForRegionException)) {
                relocateRegion(TableName.META_TABLE_NAME, metaKey, replicaId);
            }
        }
        try {
            Thread.sleep(ConnectionUtils.getPauseTime(pauseBase, tries));
        } catch (InterruptedException e) {
            throw new InterruptedIOException("Giving up trying to location region in " + "meta: thread is interrupted.");
        }
    }
}
Also used : RegionLocations(org.apache.hadoop.hbase.RegionLocations) InterruptedIOException(java.io.InterruptedIOException) CallQueueTooBigException(org.apache.hadoop.hbase.CallQueueTooBigException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) TableNotFoundException(org.apache.hadoop.hbase.TableNotFoundException) RegionServerStoppedException(org.apache.hadoop.hbase.regionserver.RegionServerStoppedException) ServerName(org.apache.hadoop.hbase.ServerName) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 2 with CallQueueTooBigException

use of org.apache.hadoop.hbase.CallQueueTooBigException in project hbase by apache.

the class TestAsyncProcess method testRetryPauseWithCallQueueTooBigException.

/**
   * Test and make sure we could use a special pause setting when retry with
   * CallQueueTooBigException, see HBASE-17114
   * @throws Exception if unexpected error happened during test
   */
@Test
public void testRetryPauseWithCallQueueTooBigException() throws Exception {
    Configuration myConf = new Configuration(CONF);
    final long specialPause = 500L;
    final int retries = 1;
    myConf.setLong(HConstants.HBASE_CLIENT_PAUSE_FOR_CQTBE, specialPause);
    myConf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, retries);
    ClusterConnection conn = new MyConnectionImpl(myConf);
    AsyncProcessWithFailure ap = new AsyncProcessWithFailure(conn, myConf, new CallQueueTooBigException());
    BufferedMutatorParams bufferParam = createBufferedMutatorParams(ap, DUMMY_TABLE);
    BufferedMutatorImpl mutator = new BufferedMutatorImpl(conn, bufferParam, ap);
    Assert.assertNotNull(mutator.getAsyncProcess().createServerErrorTracker());
    Put p = createPut(1, true);
    mutator.mutate(p);
    long startTime = System.currentTimeMillis();
    try {
        mutator.flush();
        Assert.fail();
    } catch (RetriesExhaustedWithDetailsException expected) {
    }
    long actualSleep = System.currentTimeMillis() - startTime;
    long expectedSleep = 0L;
    for (int i = 0; i < retries; i++) {
        expectedSleep += ConnectionUtils.getPauseTime(specialPause, i);
        // Prevent jitter in CollectionUtils#getPauseTime to affect result
        actualSleep += (long) (specialPause * 0.01f);
    }
    LOG.debug("Expected to sleep " + expectedSleep + "ms, actually slept " + actualSleep + "ms");
    Assert.assertTrue("Expected to sleep " + expectedSleep + " but actually " + actualSleep + "ms", actualSleep >= expectedSleep);
    // check and confirm normal IOE will use the normal pause
    final long normalPause = myConf.getLong(HConstants.HBASE_CLIENT_PAUSE, HConstants.DEFAULT_HBASE_CLIENT_PAUSE);
    ap = new AsyncProcessWithFailure(conn, myConf, new IOException());
    bufferParam = createBufferedMutatorParams(ap, DUMMY_TABLE);
    mutator = new BufferedMutatorImpl(conn, bufferParam, ap);
    Assert.assertNotNull(mutator.getAsyncProcess().createServerErrorTracker());
    mutator.mutate(p);
    startTime = System.currentTimeMillis();
    try {
        mutator.flush();
        Assert.fail();
    } catch (RetriesExhaustedWithDetailsException expected) {
    }
    actualSleep = System.currentTimeMillis() - startTime;
    expectedSleep = 0L;
    for (int i = 0; i < retries; i++) {
        expectedSleep += ConnectionUtils.getPauseTime(normalPause, i);
    }
    // plus an additional pause to balance the program execution time
    expectedSleep += normalPause;
    LOG.debug("Expected to sleep " + expectedSleep + "ms, actually slept " + actualSleep + "ms");
    Assert.assertTrue("Slept for too long: " + actualSleep + "ms", actualSleep <= expectedSleep);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) CallQueueTooBigException(org.apache.hadoop.hbase.CallQueueTooBigException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) Test(org.junit.Test)

Example 3 with CallQueueTooBigException

use of org.apache.hadoop.hbase.CallQueueTooBigException in project hbase by apache.

the class TestAsyncProcess method testCallQueueTooLarge.

@Test
public void testCallQueueTooLarge() throws IOException {
    ClusterConnection conn = new MyConnectionImpl(CONF);
    AsyncProcessWithFailure ap = new AsyncProcessWithFailure(conn, CONF, new CallQueueTooBigException());
    BufferedMutatorParams bufferParam = createBufferedMutatorParams(ap, DUMMY_TABLE);
    BufferedMutatorImpl mutator = new BufferedMutatorImpl(conn, bufferParam, ap);
    Assert.assertNotNull(ap.createServerErrorTracker());
    Put p = createPut(1, true);
    mutator.mutate(p);
    try {
        mutator.flush();
        Assert.fail();
    } catch (RetriesExhaustedWithDetailsException expected) {
    }
    // Checking that the ErrorsServers came into play and didn't make us stop immediately
    Assert.assertEquals(NB_RETRIES + 1, ap.callsCt.get());
}
Also used : CallQueueTooBigException(org.apache.hadoop.hbase.CallQueueTooBigException) Test(org.junit.Test)

Example 4 with CallQueueTooBigException

use of org.apache.hadoop.hbase.CallQueueTooBigException in project hbase by apache.

the class RpcRetryingCallerImpl method callWithRetries.

@Override
public T callWithRetries(RetryingCallable<T> callable, int callTimeout) throws IOException, RuntimeException {
    List<RetriesExhaustedException.ThrowableWithExtraContext> exceptions = new ArrayList<>();
    tracker.start();
    context.clear();
    for (int tries = 0; ; tries++) {
        long expectedSleep;
        try {
            // bad cache entries are cleared in the call to RetryingCallable#throwable() in catch block
            callable.prepare(tries != 0);
            interceptor.intercept(context.prepare(callable, tries));
            return callable.call(getTimeout(callTimeout));
        } catch (PreemptiveFastFailException e) {
            throw e;
        } catch (Throwable t) {
            Throwable e = t.getCause();
            ExceptionUtil.rethrowIfInterrupt(t);
            // translateException throws exception when should not retry: i.e. when request is bad.
            interceptor.handleFailure(context, t);
            t = translateException(t);
            if (tries > startLogErrorsCnt) {
                LOG.info("Call exception, tries=" + tries + ", maxAttempts=" + maxAttempts + ", started=" + (EnvironmentEdgeManager.currentTime() - tracker.getStartTime()) + " ms ago, " + "cancelled=" + cancelled.get() + ", msg=" + t.getMessage() + " " + callable.getExceptionMessageAdditionalDetail());
            }
            callable.throwable(t, maxAttempts != 1);
            RetriesExhaustedException.ThrowableWithExtraContext qt = new RetriesExhaustedException.ThrowableWithExtraContext(t, EnvironmentEdgeManager.currentTime(), toString());
            exceptions.add(qt);
            if (tries >= maxAttempts - 1) {
                throw new RetriesExhaustedException(tries, exceptions);
            }
            // If the server is dead, we need to wait a little before retrying, to give
            // a chance to the regions to be moved
            // get right pause time, start by RETRY_BACKOFF[0] * pauseBase, where pauseBase might be
            // special when encountering CallQueueTooBigException, see #HBASE-17114
            long pauseBase = (t instanceof CallQueueTooBigException) ? pauseForCQTBE : pause;
            expectedSleep = callable.sleep(pauseBase, tries);
            // If, after the planned sleep, there won't be enough time left, we stop now.
            long duration = singleCallDuration(expectedSleep);
            if (duration > callTimeout) {
                String msg = "callTimeout=" + callTimeout + ", callDuration=" + duration + ": " + t.getMessage() + " " + callable.getExceptionMessageAdditionalDetail();
                throw (SocketTimeoutException) (new SocketTimeoutException(msg).initCause(t));
            }
        } finally {
            interceptor.updateFailureInfo(context);
        }
        try {
            if (expectedSleep > 0) {
                synchronized (cancelled) {
                    if (cancelled.get())
                        return null;
                    cancelled.wait(expectedSleep);
                }
            }
            if (cancelled.get())
                return null;
        } catch (InterruptedException e) {
            throw new InterruptedIOException("Interrupted after " + tries + " tries while maxAttempts=" + maxAttempts);
        }
    }
}
Also used : InterruptedIOException(java.io.InterruptedIOException) CallQueueTooBigException(org.apache.hadoop.hbase.CallQueueTooBigException) ArrayList(java.util.ArrayList) PreemptiveFastFailException(org.apache.hadoop.hbase.exceptions.PreemptiveFastFailException) SocketTimeoutException(java.net.SocketTimeoutException)

Example 5 with CallQueueTooBigException

use of org.apache.hadoop.hbase.CallQueueTooBigException in project hbase by apache.

the class HRegionServer method reportRegionStateTransition.

@Override
public boolean reportRegionStateTransition(final RegionStateTransitionContext context) {
    if (TEST_SKIP_REPORTING_TRANSITION) {
        return skipReportingTransition(context);
    }
    final ReportRegionStateTransitionRequest request = createReportRegionStateTransitionRequest(context);
    int tries = 0;
    long pauseTime = this.retryPauseTime;
    // HRegionServer does down.
    while (this.asyncClusterConnection != null && !this.asyncClusterConnection.isClosed()) {
        RegionServerStatusService.BlockingInterface rss = rssStub;
        try {
            if (rss == null) {
                createRegionServerStatusStub();
                continue;
            }
            ReportRegionStateTransitionResponse response = rss.reportRegionStateTransition(null, request);
            if (response.hasErrorMessage()) {
                LOG.info("TRANSITION FAILED " + request + ": " + response.getErrorMessage());
                break;
            }
            // know if were successful after an attempt showed in logs as failed.
            if (tries > 0 || LOG.isTraceEnabled()) {
                LOG.info("TRANSITION REPORTED " + request);
            }
            // NOTE: Return mid-method!!!
            return true;
        } catch (ServiceException se) {
            IOException ioe = ProtobufUtil.getRemoteException(se);
            boolean pause = ioe instanceof ServerNotRunningYetException || ioe instanceof PleaseHoldException || ioe instanceof CallQueueTooBigException;
            if (pause) {
                // Do backoff else we flood the Master with requests.
                pauseTime = ConnectionUtils.getPauseTime(this.retryPauseTime, tries);
            } else {
                // Reset.
                pauseTime = this.retryPauseTime;
            }
            LOG.info("Failed report transition " + TextFormat.shortDebugString(request) + "; retry (#" + tries + ")" + (pause ? " after " + pauseTime + "ms delay (Master is coming online...)." : " immediately."), ioe);
            if (pause) {
                Threads.sleep(pauseTime);
            }
            tries++;
            if (rssStub == rss) {
                rssStub = null;
            }
        }
    }
    return false;
}
Also used : ServiceException(org.apache.hbase.thirdparty.com.google.protobuf.ServiceException) PleaseHoldException(org.apache.hadoop.hbase.PleaseHoldException) CallQueueTooBigException(org.apache.hadoop.hbase.CallQueueTooBigException) ReportRegionStateTransitionRequest(org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest) RegionServerStatusService(org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerStatusService) IOException(java.io.IOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) ReportRegionStateTransitionResponse(org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException)

Aggregations

CallQueueTooBigException (org.apache.hadoop.hbase.CallQueueTooBigException)5 IOException (java.io.IOException)3 InterruptedIOException (java.io.InterruptedIOException)3 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)2 Test (org.junit.Test)2 SocketTimeoutException (java.net.SocketTimeoutException)1 ArrayList (java.util.ArrayList)1 Configuration (org.apache.hadoop.conf.Configuration)1 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)1 PleaseHoldException (org.apache.hadoop.hbase.PleaseHoldException)1 RegionLocations (org.apache.hadoop.hbase.RegionLocations)1 ServerName (org.apache.hadoop.hbase.ServerName)1 TableNotFoundException (org.apache.hadoop.hbase.TableNotFoundException)1 PreemptiveFastFailException (org.apache.hadoop.hbase.exceptions.PreemptiveFastFailException)1 ServerNotRunningYetException (org.apache.hadoop.hbase.ipc.ServerNotRunningYetException)1 RegionServerStoppedException (org.apache.hadoop.hbase.regionserver.RegionServerStoppedException)1 RegionServerStatusService (org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerStatusService)1 ReportRegionStateTransitionRequest (org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest)1 ReportRegionStateTransitionResponse (org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse)1 RemoteException (org.apache.hadoop.ipc.RemoteException)1