Search in sources :

Example 1 with RegionServerStoppedException

use of org.apache.hadoop.hbase.regionserver.RegionServerStoppedException in project hbase by apache.

the class ConnectionImplementation method locateRegionInMeta.

/*
    * Search the hbase:meta table for the HRegionLocation
    * info that contains the table and row we're seeking.
    */
private RegionLocations locateRegionInMeta(TableName tableName, byte[] row, boolean useCache, boolean retry, int replicaId) throws IOException {
    // we already have the region.
    if (useCache) {
        RegionLocations locations = getCachedLocation(tableName, row);
        if (locations != null && locations.getRegionLocation(replicaId) != null) {
            return locations;
        }
    }
    // build the key of the meta region we should be looking for.
    // the extra 9's on the end are necessary to allow "exact" matches
    // without knowing the precise region names.
    byte[] metaKey = HRegionInfo.createRegionName(tableName, row, HConstants.NINES, false);
    Scan s = new Scan();
    s.setReversed(true);
    s.withStartRow(metaKey);
    s.addFamily(HConstants.CATALOG_FAMILY);
    s.setOneRowLimit();
    if (this.useMetaReplicas) {
        s.setConsistency(Consistency.TIMELINE);
    }
    int maxAttempts = (retry ? numTries : 1);
    for (int tries = 0; true; tries++) {
        if (tries >= maxAttempts) {
            throw new NoServerForRegionException("Unable to find region for " + Bytes.toStringBinary(row) + " in " + tableName + " after " + tries + " tries.");
        }
        if (useCache) {
            RegionLocations locations = getCachedLocation(tableName, row);
            if (locations != null && locations.getRegionLocation(replicaId) != null) {
                return locations;
            }
        } else {
            // If we are not supposed to be using the cache, delete any existing cached location
            // so it won't interfere.
            metaCache.clearCache(tableName, row);
        }
        // Query the meta region
        long pauseBase = this.pause;
        try {
            Result regionInfoRow = null;
            s.resetMvccReadPoint();
            try (ReversedClientScanner rcs = new ReversedClientScanner(conf, s, TableName.META_TABLE_NAME, this, rpcCallerFactory, rpcControllerFactory, getMetaLookupPool(), 0)) {
                regionInfoRow = rcs.next();
            }
            if (regionInfoRow == null) {
                throw new TableNotFoundException(tableName);
            }
            // convert the row result into the HRegionLocation we need!
            RegionLocations locations = MetaTableAccessor.getRegionLocations(regionInfoRow);
            if (locations == null || locations.getRegionLocation(replicaId) == null) {
                throw new IOException("HRegionInfo was null in " + tableName + ", row=" + regionInfoRow);
            }
            HRegionInfo regionInfo = locations.getRegionLocation(replicaId).getRegionInfo();
            if (regionInfo == null) {
                throw new IOException("HRegionInfo was null or empty in " + TableName.META_TABLE_NAME + ", row=" + regionInfoRow);
            }
            // possible we got a region of a different table...
            if (!regionInfo.getTable().equals(tableName)) {
                throw new TableNotFoundException("Table '" + tableName + "' was not found, got: " + regionInfo.getTable() + ".");
            }
            if (regionInfo.isSplit()) {
                throw new RegionOfflineException("the only available region for" + " the required row is a split parent," + " the daughters should be online soon: " + regionInfo.getRegionNameAsString());
            }
            if (regionInfo.isOffline()) {
                throw new RegionOfflineException("the region is offline, could" + " be caused by a disable table call: " + regionInfo.getRegionNameAsString());
            }
            ServerName serverName = locations.getRegionLocation(replicaId).getServerName();
            if (serverName == null) {
                throw new NoServerForRegionException("No server address listed " + "in " + TableName.META_TABLE_NAME + " for region " + regionInfo.getRegionNameAsString() + " containing row " + Bytes.toStringBinary(row));
            }
            if (isDeadServer(serverName)) {
                throw new RegionServerStoppedException("hbase:meta says the region " + regionInfo.getRegionNameAsString() + " is managed by the server " + serverName + ", but it is dead.");
            }
            // Instantiate the location
            cacheLocation(tableName, locations);
            return locations;
        } catch (TableNotFoundException e) {
            // from the HTable constructor.
            throw e;
        } catch (IOException e) {
            ExceptionUtil.rethrowIfInterrupt(e);
            if (e instanceof RemoteException) {
                e = ((RemoteException) e).unwrapRemoteException();
            }
            if (e instanceof CallQueueTooBigException) {
                // Give a special check on CallQueueTooBigException, see #HBASE-17114
                pauseBase = this.pauseForCQTBE;
            }
            if (tries < maxAttempts - 1) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("locateRegionInMeta parentTable=" + TableName.META_TABLE_NAME + ", metaLocation=" + ", attempt=" + tries + " of " + maxAttempts + " failed; retrying after sleep of " + ConnectionUtils.getPauseTime(pauseBase, tries) + " because: " + e.getMessage());
                }
            } else {
                throw e;
            }
            // Only relocate the parent region if necessary
            if (!(e instanceof RegionOfflineException || e instanceof NoServerForRegionException)) {
                relocateRegion(TableName.META_TABLE_NAME, metaKey, replicaId);
            }
        }
        try {
            Thread.sleep(ConnectionUtils.getPauseTime(pauseBase, tries));
        } catch (InterruptedException e) {
            throw new InterruptedIOException("Giving up trying to location region in " + "meta: thread is interrupted.");
        }
    }
}
Also used : RegionLocations(org.apache.hadoop.hbase.RegionLocations) InterruptedIOException(java.io.InterruptedIOException) CallQueueTooBigException(org.apache.hadoop.hbase.CallQueueTooBigException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) TableNotFoundException(org.apache.hadoop.hbase.TableNotFoundException) RegionServerStoppedException(org.apache.hadoop.hbase.regionserver.RegionServerStoppedException) ServerName(org.apache.hadoop.hbase.ServerName) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 2 with RegionServerStoppedException

use of org.apache.hadoop.hbase.regionserver.RegionServerStoppedException in project hbase by apache.

the class AssignmentManager method assign.

/**
   * Bulk assign regions to <code>destination</code>.
   * @param destination
   * @param regions Regions to assign.
   * @return true if successful
   */
boolean assign(final ServerName destination, final List<HRegionInfo> regions) throws InterruptedException {
    long startTime = EnvironmentEdgeManager.currentTime();
    try {
        int regionCount = regions.size();
        if (regionCount == 0) {
            return true;
        }
        LOG.info("Assigning " + regionCount + " region(s) to " + destination.toString());
        Set<String> encodedNames = new HashSet<>(regionCount);
        for (HRegionInfo region : regions) {
            encodedNames.add(region.getEncodedName());
        }
        List<HRegionInfo> failedToOpenRegions = new ArrayList<>();
        Map<String, Lock> locks = locker.acquireLocks(encodedNames);
        try {
            Map<String, RegionPlan> plans = new HashMap<>(regionCount);
            List<RegionState> states = new ArrayList<>(regionCount);
            for (HRegionInfo region : regions) {
                String encodedName = region.getEncodedName();
                if (!isDisabledorDisablingRegionInRIT(region)) {
                    RegionState state = forceRegionStateToOffline(region, false);
                    boolean onDeadServer = false;
                    if (state != null) {
                        if (regionStates.wasRegionOnDeadServer(encodedName)) {
                            LOG.info("Skip assigning " + region.getRegionNameAsString() + ", it's host " + regionStates.getLastRegionServerOfRegion(encodedName) + " is dead but not processed yet");
                            onDeadServer = true;
                        } else {
                            RegionPlan plan = new RegionPlan(region, state.getServerName(), destination);
                            plans.put(encodedName, plan);
                            states.add(state);
                            continue;
                        }
                    }
                    // Reassign if the region wasn't on a dead server
                    if (!onDeadServer) {
                        LOG.info("failed to force region state to offline, " + "will reassign later: " + region);
                        // assign individually later
                        failedToOpenRegions.add(region);
                    }
                }
                // Release the lock, this region is excluded from bulk assign because
                // we can't update its state, or set its znode to offline.
                Lock lock = locks.remove(encodedName);
                lock.unlock();
            }
            if (server.isStopped()) {
                return false;
            }
            // Add region plans, so we can updateTimers when one region is opened so
            // that unnecessary timeout on RIT is reduced.
            this.addPlans(plans);
            List<Pair<HRegionInfo, List<ServerName>>> regionOpenInfos = new ArrayList<>(states.size());
            for (RegionState state : states) {
                HRegionInfo region = state.getRegion();
                regionStates.updateRegionState(region, State.PENDING_OPEN, destination);
                List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
                if (shouldAssignFavoredNodes(region)) {
                    favoredNodes = server.getFavoredNodesManager().getFavoredNodesWithDNPort(region);
                }
                regionOpenInfos.add(new Pair<>(region, favoredNodes));
            }
            // Move on to open regions.
            try {
                // Send OPEN RPC. If it fails on a IOE or RemoteException,
                // regions will be assigned individually.
                Configuration conf = server.getConfiguration();
                long maxWaitTime = System.currentTimeMillis() + conf.getLong("hbase.regionserver.rpc.startup.waittime", 60000);
                for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) {
                    try {
                        List<RegionOpeningState> regionOpeningStateList = serverManager.sendRegionOpen(destination, regionOpenInfos);
                        for (int k = 0, n = regionOpeningStateList.size(); k < n; k++) {
                            RegionOpeningState openingState = regionOpeningStateList.get(k);
                            if (openingState != RegionOpeningState.OPENED) {
                                HRegionInfo region = regionOpenInfos.get(k).getFirst();
                                LOG.info("Got opening state " + openingState + ", will reassign later: " + region);
                                // Failed opening this region, reassign it later
                                forceRegionStateToOffline(region, true);
                                failedToOpenRegions.add(region);
                            }
                        }
                        break;
                    } catch (IOException e) {
                        if (e instanceof RemoteException) {
                            e = ((RemoteException) e).unwrapRemoteException();
                        }
                        if (e instanceof RegionServerStoppedException) {
                            LOG.warn("The region server was shut down, ", e);
                            // No need to retry, the region server is a goner.
                            return false;
                        } else if (e instanceof ServerNotRunningYetException) {
                            long now = System.currentTimeMillis();
                            if (now < maxWaitTime) {
                                if (LOG.isDebugEnabled()) {
                                    LOG.debug("Server is not yet up; waiting up to " + (maxWaitTime - now) + "ms", e);
                                }
                                Thread.sleep(100);
                                // reset the try count
                                i--;
                                continue;
                            }
                        } else if (e instanceof java.net.SocketTimeoutException && this.serverManager.isServerOnline(destination)) {
                            // open the region on the same server.
                            if (LOG.isDebugEnabled()) {
                                LOG.debug("Bulk assigner openRegion() to " + destination + " has timed out, but the regions might" + " already be opened on it.", e);
                            }
                            // wait and reset the re-try count, server might be just busy.
                            Thread.sleep(100);
                            i--;
                            continue;
                        } else if (e instanceof FailedServerException && i < maximumAttempts) {
                            // In case the server is in the failed server list, no point to
                            // retry too soon. Retry after the failed_server_expiry time
                            long sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
                            if (LOG.isDebugEnabled()) {
                                LOG.debug(destination + " is on failed server list; waiting " + sleepTime + "ms", e);
                            }
                            Thread.sleep(sleepTime);
                            continue;
                        }
                        throw e;
                    }
                }
            } catch (IOException e) {
                // Can be a socket timeout, EOF, NoRouteToHost, etc
                LOG.info("Unable to communicate with " + destination + " in order to assign regions, ", e);
                for (RegionState state : states) {
                    HRegionInfo region = state.getRegion();
                    forceRegionStateToOffline(region, true);
                }
                return false;
            }
        } finally {
            for (Lock lock : locks.values()) {
                lock.unlock();
            }
        }
        if (!failedToOpenRegions.isEmpty()) {
            for (HRegionInfo region : failedToOpenRegions) {
                if (!regionStates.isRegionOnline(region)) {
                    invokeAssign(region);
                }
            }
        }
        // wait for assignment completion
        ArrayList<HRegionInfo> userRegionSet = new ArrayList<>(regions.size());
        for (HRegionInfo region : regions) {
            if (!region.getTable().isSystemTable()) {
                userRegionSet.add(region);
            }
        }
        if (!waitForAssignment(userRegionSet, true, userRegionSet.size(), System.currentTimeMillis())) {
            LOG.debug("some user regions are still in transition: " + userRegionSet);
        }
        LOG.debug("Bulk assigning done for " + destination);
        return true;
    } finally {
        metricsAssignmentManager.updateBulkAssignTime(EnvironmentEdgeManager.currentTime() - startTime);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) FailedServerException(org.apache.hadoop.hbase.ipc.FailedServerException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) RegionServerStoppedException(org.apache.hadoop.hbase.regionserver.RegionServerStoppedException) HashSet(java.util.HashSet) Pair(org.apache.hadoop.hbase.util.Pair) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) IOException(java.io.IOException) ReentrantLock(java.util.concurrent.locks.ReentrantLock) Lock(java.util.concurrent.locks.Lock) ServerName(org.apache.hadoop.hbase.ServerName) RegionOpeningState(org.apache.hadoop.hbase.regionserver.RegionOpeningState) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 3 with RegionServerStoppedException

use of org.apache.hadoop.hbase.regionserver.RegionServerStoppedException in project hbase by apache.

the class TestHCM method testClusterStatus.

// Fails too often!  Needs work.  HBASE-12558
@Ignore
@Test(expected = RegionServerStoppedException.class)
public void testClusterStatus() throws Exception {
    final TableName tableName = TableName.valueOf(name.getMethodName());
    byte[] cf = "cf".getBytes();
    byte[] rk = "rk1".getBytes();
    JVMClusterUtil.RegionServerThread rs = TEST_UTIL.getHBaseCluster().startRegionServer();
    rs.waitForServerOnline();
    final ServerName sn = rs.getRegionServer().getServerName();
    Table t = TEST_UTIL.createTable(tableName, cf);
    TEST_UTIL.waitTableAvailable(tableName);
    TEST_UTIL.waitUntilNoRegionsInTransition();
    final ConnectionImplementation hci = (ConnectionImplementation) TEST_UTIL.getConnection();
    try (RegionLocator l = TEST_UTIL.getConnection().getRegionLocator(tableName)) {
        while (l.getRegionLocation(rk).getPort() != sn.getPort()) {
            TEST_UTIL.getAdmin().move(l.getRegionLocation(rk).getRegionInfo().getEncodedNameAsBytes(), Bytes.toBytes(sn.toString()));
            TEST_UTIL.waitUntilNoRegionsInTransition();
            hci.clearRegionCache(tableName);
        }
        Assert.assertNotNull(hci.clusterStatusListener);
        TEST_UTIL.assertRegionOnServer(l.getRegionLocation(rk).getRegionInfo(), sn, 20000);
    }
    Put p1 = new Put(rk);
    p1.addColumn(cf, "qual".getBytes(), "val".getBytes());
    t.put(p1);
    rs.getRegionServer().abort("I'm dead");
    // We want the status to be updated. That's a least 10 second
    TEST_UTIL.waitFor(40000, 1000, true, new Waiter.Predicate<Exception>() {

        @Override
        public boolean evaluate() throws Exception {
            return TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getDeadServers().isDeadServer(sn);
        }
    });
    TEST_UTIL.waitFor(40000, 1000, true, new Waiter.Predicate<Exception>() {

        @Override
        public boolean evaluate() throws Exception {
            return hci.clusterStatusListener.isDeadServer(sn);
        }
    });
    t.close();
    // will throw an exception: RegionServerStoppedException
    hci.getClient(sn);
}
Also used : DeserializationException(org.apache.hadoop.hbase.exceptions.DeserializationException) ServerTooBusyException(org.apache.hadoop.hbase.ipc.ServerTooBusyException) RegionMovedException(org.apache.hadoop.hbase.exceptions.RegionMovedException) SocketTimeoutException(java.net.SocketTimeoutException) IOException(java.io.IOException) RegionServerStoppedException(org.apache.hadoop.hbase.regionserver.RegionServerStoppedException) TableName(org.apache.hadoop.hbase.TableName) JVMClusterUtil(org.apache.hadoop.hbase.util.JVMClusterUtil) ServerName(org.apache.hadoop.hbase.ServerName) Waiter(org.apache.hadoop.hbase.Waiter) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 4 with RegionServerStoppedException

use of org.apache.hadoop.hbase.regionserver.RegionServerStoppedException in project hbase by apache.

the class ScannerCallable method next.

private ScanResponse next() throws IOException {
    // Reset the heartbeat flag prior to each RPC in case an exception is thrown by the server
    setHeartbeatMessage(false);
    incRPCcallsMetrics();
    ScanRequest request = RequestConverter.buildScanRequest(scannerId, caching, false, nextCallSeq, this.scanMetrics != null, renew, scan.getLimit());
    try {
        ScanResponse response = getStub().scan(getRpcController(), request);
        nextCallSeq++;
        return response;
    } catch (Exception e) {
        IOException ioe = ProtobufUtil.handleRemoteException(e);
        if (logScannerActivity) {
            LOG.info("Got exception making request " + ProtobufUtil.toText(request) + " to " + getLocation(), e);
        }
        if (logScannerActivity) {
            if (ioe instanceof UnknownScannerException) {
                try {
                    HRegionLocation location = getConnection().relocateRegion(getTableName(), scan.getStartRow());
                    LOG.info("Scanner=" + scannerId + " expired, current region location is " + location.toString());
                } catch (Throwable t) {
                    LOG.info("Failed to relocate region", t);
                }
            } else if (ioe instanceof ScannerResetException) {
                LOG.info("Scanner=" + scannerId + " has received an exception, and the server " + "asked us to reset the scanner state.", ioe);
            }
        }
        // yeah and hard to follow and in need of a refactor).
        if (ioe instanceof NotServingRegionException) {
            // Attach NSRE to signal client that it needs to re-setup scanner.
            if (this.scanMetrics != null) {
                this.scanMetrics.countOfNSRE.incrementAndGet();
            }
            throw new DoNotRetryIOException("Resetting the scanner -- see exception cause", ioe);
        } else if (ioe instanceof RegionServerStoppedException) {
            // open scanner against new location.
            throw new DoNotRetryIOException("Resetting the scanner -- see exception cause", ioe);
        } else {
            // The outer layers will retry
            throw ioe;
        }
    }
}
Also used : ScanRequest(org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.ScanRequest) RegionServerStoppedException(org.apache.hadoop.hbase.regionserver.RegionServerStoppedException) HRegionLocation(org.apache.hadoop.hbase.HRegionLocation) NotServingRegionException(org.apache.hadoop.hbase.NotServingRegionException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) ScannerResetException(org.apache.hadoop.hbase.exceptions.ScannerResetException) ScanResponse(org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.ScanResponse) InterruptedIOException(java.io.InterruptedIOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) IOException(java.io.IOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) InterruptedIOException(java.io.InterruptedIOException) NotServingRegionException(org.apache.hadoop.hbase.NotServingRegionException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) RegionServerStoppedException(org.apache.hadoop.hbase.regionserver.RegionServerStoppedException) UnknownScannerException(org.apache.hadoop.hbase.UnknownScannerException) ScannerResetException(org.apache.hadoop.hbase.exceptions.ScannerResetException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) UnknownScannerException(org.apache.hadoop.hbase.UnknownScannerException)

Aggregations

IOException (java.io.IOException)4 RegionServerStoppedException (org.apache.hadoop.hbase.regionserver.RegionServerStoppedException)4 ServerName (org.apache.hadoop.hbase.ServerName)3 InterruptedIOException (java.io.InterruptedIOException)2 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)2 HBaseIOException (org.apache.hadoop.hbase.HBaseIOException)2 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)2 RemoteException (org.apache.hadoop.ipc.RemoteException)2 SocketTimeoutException (java.net.SocketTimeoutException)1 UnknownHostException (java.net.UnknownHostException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)1 Lock (java.util.concurrent.locks.Lock)1 ReentrantLock (java.util.concurrent.locks.ReentrantLock)1 Configuration (org.apache.hadoop.conf.Configuration)1 CallQueueTooBigException (org.apache.hadoop.hbase.CallQueueTooBigException)1 HRegionLocation (org.apache.hadoop.hbase.HRegionLocation)1