Search in sources :

Example 21 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class MasterMetaBootstrap method getPreviouselyFailedMetaServersFromZK.

/**
   * This function returns a set of region server names under hbase:meta recovering region ZK node
   * @return Set of meta server names which were recorded in ZK
   */
private Set<ServerName> getPreviouselyFailedMetaServersFromZK() throws KeeperException {
    final ZooKeeperWatcher zooKeeper = master.getZooKeeper();
    Set<ServerName> result = new HashSet<>();
    String metaRecoveringZNode = ZKUtil.joinZNode(zooKeeper.znodePaths.recoveringRegionsZNode, HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
    List<String> regionFailedServers = ZKUtil.listChildrenNoWatch(zooKeeper, metaRecoveringZNode);
    if (regionFailedServers == null)
        return result;
    for (String failedServer : regionFailedServers) {
        ServerName server = ServerName.parseServerName(failedServer);
        result.add(server);
    }
    return result;
}
Also used : ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) ServerName(org.apache.hadoop.hbase.ServerName) HashSet(java.util.HashSet)

Example 22 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class HMaster method move.

// Public so can be accessed by tests.
@VisibleForTesting
public void move(final byte[] encodedRegionName, final byte[] destServerName) throws HBaseIOException {
    RegionState regionState = assignmentManager.getRegionStates().getRegionState(Bytes.toString(encodedRegionName));
    HRegionInfo hri;
    if (regionState != null) {
        hri = regionState.getRegion();
    } else {
        throw new UnknownRegionException(Bytes.toStringBinary(encodedRegionName));
    }
    ServerName dest;
    if (destServerName == null || destServerName.length == 0) {
        LOG.info("Passed destination servername is null/empty so " + "choosing a server at random");
        final List<ServerName> destServers = this.serverManager.createDestinationServersList(regionState.getServerName());
        dest = balancer.randomAssignment(hri, destServers);
        if (dest == null) {
            LOG.debug("Unable to determine a plan to assign " + hri);
            return;
        }
    } else {
        ServerName candidate = ServerName.valueOf(Bytes.toString(destServerName));
        dest = balancer.randomAssignment(hri, Lists.newArrayList(candidate));
        if (dest == null) {
            LOG.debug("Unable to determine a plan to assign " + hri);
            return;
        }
        if (dest.equals(serverName) && balancer instanceof BaseLoadBalancer && !((BaseLoadBalancer) balancer).shouldBeOnMaster(hri)) {
            // To avoid unnecessary region moving later by balancer. Don't put user
            // regions on master. Regions on master could be put on other region
            // server intentionally by test however.
            LOG.debug("Skipping move of region " + hri.getRegionNameAsString() + " to avoid unnecessary region moving later by load balancer," + " because it should not be on master");
            return;
        }
    }
    if (dest.equals(regionState.getServerName())) {
        LOG.debug("Skipping move of region " + hri.getRegionNameAsString() + " because region already assigned to the same server " + dest + ".");
        return;
    }
    // Now we can do the move
    RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), dest);
    try {
        checkInitialized();
        if (this.cpHost != null) {
            if (this.cpHost.preMove(hri, rp.getSource(), rp.getDestination())) {
                return;
            }
        }
        // warmup the region on the destination before initiating the move. this call
        // is synchronous and takes some time. doing it before the source region gets
        // closed
        serverManager.sendRegionWarmup(rp.getDestination(), hri);
        LOG.info(getClientIdAuditPrefix() + " move " + rp + ", running balancer");
        this.assignmentManager.balance(rp);
        if (this.cpHost != null) {
            this.cpHost.postMove(hri, rp.getSource(), rp.getDestination());
        }
    } catch (IOException ioe) {
        if (ioe instanceof HBaseIOException) {
            throw (HBaseIOException) ioe;
        }
        throw new HBaseIOException(ioe);
    }
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) UnknownRegionException(org.apache.hadoop.hbase.UnknownRegionException) ServerName(org.apache.hadoop.hbase.ServerName) BaseLoadBalancer(org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 23 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class AssignmentManager method assign.

/**
   * Bulk assign regions to <code>destination</code>.
   * @param destination
   * @param regions Regions to assign.
   * @return true if successful
   */
boolean assign(final ServerName destination, final List<HRegionInfo> regions) throws InterruptedException {
    long startTime = EnvironmentEdgeManager.currentTime();
    try {
        int regionCount = regions.size();
        if (regionCount == 0) {
            return true;
        }
        LOG.info("Assigning " + regionCount + " region(s) to " + destination.toString());
        Set<String> encodedNames = new HashSet<>(regionCount);
        for (HRegionInfo region : regions) {
            encodedNames.add(region.getEncodedName());
        }
        List<HRegionInfo> failedToOpenRegions = new ArrayList<>();
        Map<String, Lock> locks = locker.acquireLocks(encodedNames);
        try {
            Map<String, RegionPlan> plans = new HashMap<>(regionCount);
            List<RegionState> states = new ArrayList<>(regionCount);
            for (HRegionInfo region : regions) {
                String encodedName = region.getEncodedName();
                if (!isDisabledorDisablingRegionInRIT(region)) {
                    RegionState state = forceRegionStateToOffline(region, false);
                    boolean onDeadServer = false;
                    if (state != null) {
                        if (regionStates.wasRegionOnDeadServer(encodedName)) {
                            LOG.info("Skip assigning " + region.getRegionNameAsString() + ", it's host " + regionStates.getLastRegionServerOfRegion(encodedName) + " is dead but not processed yet");
                            onDeadServer = true;
                        } else {
                            RegionPlan plan = new RegionPlan(region, state.getServerName(), destination);
                            plans.put(encodedName, plan);
                            states.add(state);
                            continue;
                        }
                    }
                    // Reassign if the region wasn't on a dead server
                    if (!onDeadServer) {
                        LOG.info("failed to force region state to offline, " + "will reassign later: " + region);
                        // assign individually later
                        failedToOpenRegions.add(region);
                    }
                }
                // Release the lock, this region is excluded from bulk assign because
                // we can't update its state, or set its znode to offline.
                Lock lock = locks.remove(encodedName);
                lock.unlock();
            }
            if (server.isStopped()) {
                return false;
            }
            // Add region plans, so we can updateTimers when one region is opened so
            // that unnecessary timeout on RIT is reduced.
            this.addPlans(plans);
            List<Pair<HRegionInfo, List<ServerName>>> regionOpenInfos = new ArrayList<>(states.size());
            for (RegionState state : states) {
                HRegionInfo region = state.getRegion();
                regionStates.updateRegionState(region, State.PENDING_OPEN, destination);
                List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
                if (shouldAssignFavoredNodes(region)) {
                    favoredNodes = server.getFavoredNodesManager().getFavoredNodesWithDNPort(region);
                }
                regionOpenInfos.add(new Pair<>(region, favoredNodes));
            }
            // Move on to open regions.
            try {
                // Send OPEN RPC. If it fails on a IOE or RemoteException,
                // regions will be assigned individually.
                Configuration conf = server.getConfiguration();
                long maxWaitTime = System.currentTimeMillis() + conf.getLong("hbase.regionserver.rpc.startup.waittime", 60000);
                for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) {
                    try {
                        List<RegionOpeningState> regionOpeningStateList = serverManager.sendRegionOpen(destination, regionOpenInfos);
                        for (int k = 0, n = regionOpeningStateList.size(); k < n; k++) {
                            RegionOpeningState openingState = regionOpeningStateList.get(k);
                            if (openingState != RegionOpeningState.OPENED) {
                                HRegionInfo region = regionOpenInfos.get(k).getFirst();
                                LOG.info("Got opening state " + openingState + ", will reassign later: " + region);
                                // Failed opening this region, reassign it later
                                forceRegionStateToOffline(region, true);
                                failedToOpenRegions.add(region);
                            }
                        }
                        break;
                    } catch (IOException e) {
                        if (e instanceof RemoteException) {
                            e = ((RemoteException) e).unwrapRemoteException();
                        }
                        if (e instanceof RegionServerStoppedException) {
                            LOG.warn("The region server was shut down, ", e);
                            // No need to retry, the region server is a goner.
                            return false;
                        } else if (e instanceof ServerNotRunningYetException) {
                            long now = System.currentTimeMillis();
                            if (now < maxWaitTime) {
                                if (LOG.isDebugEnabled()) {
                                    LOG.debug("Server is not yet up; waiting up to " + (maxWaitTime - now) + "ms", e);
                                }
                                Thread.sleep(100);
                                // reset the try count
                                i--;
                                continue;
                            }
                        } else if (e instanceof java.net.SocketTimeoutException && this.serverManager.isServerOnline(destination)) {
                            // open the region on the same server.
                            if (LOG.isDebugEnabled()) {
                                LOG.debug("Bulk assigner openRegion() to " + destination + " has timed out, but the regions might" + " already be opened on it.", e);
                            }
                            // wait and reset the re-try count, server might be just busy.
                            Thread.sleep(100);
                            i--;
                            continue;
                        } else if (e instanceof FailedServerException && i < maximumAttempts) {
                            // In case the server is in the failed server list, no point to
                            // retry too soon. Retry after the failed_server_expiry time
                            long sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
                            if (LOG.isDebugEnabled()) {
                                LOG.debug(destination + " is on failed server list; waiting " + sleepTime + "ms", e);
                            }
                            Thread.sleep(sleepTime);
                            continue;
                        }
                        throw e;
                    }
                }
            } catch (IOException e) {
                // Can be a socket timeout, EOF, NoRouteToHost, etc
                LOG.info("Unable to communicate with " + destination + " in order to assign regions, ", e);
                for (RegionState state : states) {
                    HRegionInfo region = state.getRegion();
                    forceRegionStateToOffline(region, true);
                }
                return false;
            }
        } finally {
            for (Lock lock : locks.values()) {
                lock.unlock();
            }
        }
        if (!failedToOpenRegions.isEmpty()) {
            for (HRegionInfo region : failedToOpenRegions) {
                if (!regionStates.isRegionOnline(region)) {
                    invokeAssign(region);
                }
            }
        }
        // wait for assignment completion
        ArrayList<HRegionInfo> userRegionSet = new ArrayList<>(regions.size());
        for (HRegionInfo region : regions) {
            if (!region.getTable().isSystemTable()) {
                userRegionSet.add(region);
            }
        }
        if (!waitForAssignment(userRegionSet, true, userRegionSet.size(), System.currentTimeMillis())) {
            LOG.debug("some user regions are still in transition: " + userRegionSet);
        }
        LOG.debug("Bulk assigning done for " + destination);
        return true;
    } finally {
        metricsAssignmentManager.updateBulkAssignTime(EnvironmentEdgeManager.currentTime() - startTime);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) FailedServerException(org.apache.hadoop.hbase.ipc.FailedServerException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) RegionServerStoppedException(org.apache.hadoop.hbase.regionserver.RegionServerStoppedException) HashSet(java.util.HashSet) Pair(org.apache.hadoop.hbase.util.Pair) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) IOException(java.io.IOException) ReentrantLock(java.util.concurrent.locks.ReentrantLock) Lock(java.util.concurrent.locks.Lock) ServerName(org.apache.hadoop.hbase.ServerName) RegionOpeningState(org.apache.hadoop.hbase.regionserver.RegionOpeningState) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 24 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class AssignmentManager method processRegionsInTransition.

/**
   * Processes list of regions in transition at startup
   */
void processRegionsInTransition(Collection<RegionState> regionsInTransition) {
    // to the region if the master dies right after the RPC call is out.
    for (RegionState regionState : regionsInTransition) {
        LOG.info("Processing " + regionState);
        ServerName serverName = regionState.getServerName();
        // case, try assigning it here.
        if (serverName != null && !serverManager.getOnlineServers().containsKey(serverName)) {
            LOG.info("Server " + serverName + " isn't online. SSH will handle this");
            // SSH will handle it
            continue;
        }
        HRegionInfo regionInfo = regionState.getRegion();
        RegionState.State state = regionState.getState();
        switch(state) {
            case CLOSED:
                invokeAssign(regionState.getRegion());
                break;
            case PENDING_OPEN:
                retrySendRegionOpen(regionState);
                break;
            case PENDING_CLOSE:
                retrySendRegionClose(regionState);
                break;
            case FAILED_CLOSE:
            case FAILED_OPEN:
                invokeUnAssign(regionInfo);
                break;
            default:
                // No process for other states
                break;
        }
    }
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ServerName(org.apache.hadoop.hbase.ServerName) State(org.apache.hadoop.hbase.master.RegionState.State)

Example 25 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class AssignmentManager method cleanOutCrashedServerReferences.

/**
   * Clean out crashed server removing any assignments.
   * @param sn Server that went down.
   * @return list of regions in transition on this server
   */
public List<HRegionInfo> cleanOutCrashedServerReferences(final ServerName sn) {
    // Clean out any existing assignment plans for this server
    synchronized (this.regionPlans) {
        for (Iterator<Map.Entry<String, RegionPlan>> i = this.regionPlans.entrySet().iterator(); i.hasNext(); ) {
            Map.Entry<String, RegionPlan> e = i.next();
            ServerName otherSn = e.getValue().getDestination();
            // The name will be null if the region is planned for a random assign.
            if (otherSn != null && otherSn.equals(sn)) {
                // Use iterator's remove else we'll get CME
                i.remove();
            }
        }
    }
    List<HRegionInfo> rits = regionStates.serverOffline(sn);
    for (Iterator<HRegionInfo> it = rits.iterator(); it.hasNext(); ) {
        HRegionInfo hri = it.next();
        String encodedName = hri.getEncodedName();
        // We need a lock on the region as we could update it
        Lock lock = locker.acquireLock(encodedName);
        try {
            RegionState regionState = regionStates.getRegionTransitionState(encodedName);
            if (regionState == null || (regionState.getServerName() != null && !regionState.isOnServer(sn)) || !RegionStates.isOneOfStates(regionState, State.PENDING_OPEN, State.OPENING, State.FAILED_OPEN, State.FAILED_CLOSE, State.OFFLINE)) {
                LOG.info("Skip " + regionState + " since it is not opening/failed_close" + " on the dead server any more: " + sn);
                it.remove();
            } else {
                if (tableStateManager.isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING)) {
                    regionStates.regionOffline(hri);
                    it.remove();
                    continue;
                }
                // Mark the region offline and assign it again by SSH
                regionStates.updateRegionState(hri, State.OFFLINE);
            }
        } finally {
            lock.unlock();
        }
    }
    return rits;
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ServerName(org.apache.hadoop.hbase.ServerName) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) NavigableMap(java.util.NavigableMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) ReentrantLock(java.util.concurrent.locks.ReentrantLock) Lock(java.util.concurrent.locks.Lock)

Aggregations

ServerName (org.apache.hadoop.hbase.ServerName)426 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)202 Test (org.junit.Test)163 ArrayList (java.util.ArrayList)97 TableName (org.apache.hadoop.hbase.TableName)89 IOException (java.io.IOException)87 HashMap (java.util.HashMap)81 List (java.util.List)72 Map (java.util.Map)54 HRegionLocation (org.apache.hadoop.hbase.HRegionLocation)45 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)34 Table (org.apache.hadoop.hbase.client.Table)33 HashSet (java.util.HashSet)32 TreeMap (java.util.TreeMap)31 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)29 Configuration (org.apache.hadoop.conf.Configuration)26 HRegionServer (org.apache.hadoop.hbase.regionserver.HRegionServer)26 Pair (org.apache.hadoop.hbase.util.Pair)24 KeeperException (org.apache.zookeeper.KeeperException)23 InterruptedIOException (java.io.InterruptedIOException)22