Search in sources :

Example 21 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class MasterMetaBootstrap method getPreviouselyFailedMetaServersFromZK.

   * This function returns a set of region server names under hbase:meta recovering region ZK node
   * @return Set of meta server names which were recorded in ZK
private Set<ServerName> getPreviouselyFailedMetaServersFromZK() throws KeeperException {
    final ZooKeeperWatcher zooKeeper = master.getZooKeeper();
    Set<ServerName> result = new HashSet<>();
    String metaRecoveringZNode = ZKUtil.joinZNode(zooKeeper.znodePaths.recoveringRegionsZNode, HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
    List<String> regionFailedServers = ZKUtil.listChildrenNoWatch(zooKeeper, metaRecoveringZNode);
    if (regionFailedServers == null)
        return result;
    for (String failedServer : regionFailedServers) {
        ServerName server = ServerName.parseServerName(failedServer);
    return result;
Also used : ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) ServerName(org.apache.hadoop.hbase.ServerName) HashSet(java.util.HashSet)

Example 22 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class HMaster method move.

// Public so can be accessed by tests.
public void move(final byte[] encodedRegionName, final byte[] destServerName) throws HBaseIOException {
    RegionState regionState = assignmentManager.getRegionStates().getRegionState(Bytes.toString(encodedRegionName));
    HRegionInfo hri;
    if (regionState != null) {
        hri = regionState.getRegion();
    } else {
        throw new UnknownRegionException(Bytes.toStringBinary(encodedRegionName));
    ServerName dest;
    if (destServerName == null || destServerName.length == 0) {"Passed destination servername is null/empty so " + "choosing a server at random");
        final List<ServerName> destServers = this.serverManager.createDestinationServersList(regionState.getServerName());
        dest = balancer.randomAssignment(hri, destServers);
        if (dest == null) {
            LOG.debug("Unable to determine a plan to assign " + hri);
    } else {
        ServerName candidate = ServerName.valueOf(Bytes.toString(destServerName));
        dest = balancer.randomAssignment(hri, Lists.newArrayList(candidate));
        if (dest == null) {
            LOG.debug("Unable to determine a plan to assign " + hri);
        if (dest.equals(serverName) && balancer instanceof BaseLoadBalancer && !((BaseLoadBalancer) balancer).shouldBeOnMaster(hri)) {
            // To avoid unnecessary region moving later by balancer. Don't put user
            // regions on master. Regions on master could be put on other region
            // server intentionally by test however.
            LOG.debug("Skipping move of region " + hri.getRegionNameAsString() + " to avoid unnecessary region moving later by load balancer," + " because it should not be on master");
    if (dest.equals(regionState.getServerName())) {
        LOG.debug("Skipping move of region " + hri.getRegionNameAsString() + " because region already assigned to the same server " + dest + ".");
    // Now we can do the move
    RegionPlan rp = new RegionPlan(hri, regionState.getServerName(), dest);
    try {
        if (this.cpHost != null) {
            if (this.cpHost.preMove(hri, rp.getSource(), rp.getDestination())) {
        // warmup the region on the destination before initiating the move. this call
        // is synchronous and takes some time. doing it before the source region gets
        // closed
        serverManager.sendRegionWarmup(rp.getDestination(), hri); + " move " + rp + ", running balancer");
        if (this.cpHost != null) {
            this.cpHost.postMove(hri, rp.getSource(), rp.getDestination());
    } catch (IOException ioe) {
        if (ioe instanceof HBaseIOException) {
            throw (HBaseIOException) ioe;
        throw new HBaseIOException(ioe);
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) UnknownRegionException(org.apache.hadoop.hbase.UnknownRegionException) ServerName(org.apache.hadoop.hbase.ServerName) BaseLoadBalancer(org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer) InterruptedIOException( IOException( DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) VisibleForTesting(

Example 23 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class AssignmentManager method assign.

   * Bulk assign regions to <code>destination</code>.
   * @param destination
   * @param regions Regions to assign.
   * @return true if successful
boolean assign(final ServerName destination, final List<HRegionInfo> regions) throws InterruptedException {
    long startTime = EnvironmentEdgeManager.currentTime();
    try {
        int regionCount = regions.size();
        if (regionCount == 0) {
            return true;
        }"Assigning " + regionCount + " region(s) to " + destination.toString());
        Set<String> encodedNames = new HashSet<>(regionCount);
        for (HRegionInfo region : regions) {
        List<HRegionInfo> failedToOpenRegions = new ArrayList<>();
        Map<String, Lock> locks = locker.acquireLocks(encodedNames);
        try {
            Map<String, RegionPlan> plans = new HashMap<>(regionCount);
            List<RegionState> states = new ArrayList<>(regionCount);
            for (HRegionInfo region : regions) {
                String encodedName = region.getEncodedName();
                if (!isDisabledorDisablingRegionInRIT(region)) {
                    RegionState state = forceRegionStateToOffline(region, false);
                    boolean onDeadServer = false;
                    if (state != null) {
                        if (regionStates.wasRegionOnDeadServer(encodedName)) {
                  "Skip assigning " + region.getRegionNameAsString() + ", it's host " + regionStates.getLastRegionServerOfRegion(encodedName) + " is dead but not processed yet");
                            onDeadServer = true;
                        } else {
                            RegionPlan plan = new RegionPlan(region, state.getServerName(), destination);
                            plans.put(encodedName, plan);
                    // Reassign if the region wasn't on a dead server
                    if (!onDeadServer) {
              "failed to force region state to offline, " + "will reassign later: " + region);
                        // assign individually later
                // Release the lock, this region is excluded from bulk assign because
                // we can't update its state, or set its znode to offline.
                Lock lock = locks.remove(encodedName);
            if (server.isStopped()) {
                return false;
            // Add region plans, so we can updateTimers when one region is opened so
            // that unnecessary timeout on RIT is reduced.
            List<Pair<HRegionInfo, List<ServerName>>> regionOpenInfos = new ArrayList<>(states.size());
            for (RegionState state : states) {
                HRegionInfo region = state.getRegion();
                regionStates.updateRegionState(region, State.PENDING_OPEN, destination);
                List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
                if (shouldAssignFavoredNodes(region)) {
                    favoredNodes = server.getFavoredNodesManager().getFavoredNodesWithDNPort(region);
                regionOpenInfos.add(new Pair<>(region, favoredNodes));
            // Move on to open regions.
            try {
                // Send OPEN RPC. If it fails on a IOE or RemoteException,
                // regions will be assigned individually.
                Configuration conf = server.getConfiguration();
                long maxWaitTime = System.currentTimeMillis() + conf.getLong("hbase.regionserver.rpc.startup.waittime", 60000);
                for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) {
                    try {
                        List<RegionOpeningState> regionOpeningStateList = serverManager.sendRegionOpen(destination, regionOpenInfos);
                        for (int k = 0, n = regionOpeningStateList.size(); k < n; k++) {
                            RegionOpeningState openingState = regionOpeningStateList.get(k);
                            if (openingState != RegionOpeningState.OPENED) {
                                HRegionInfo region = regionOpenInfos.get(k).getFirst();
                      "Got opening state " + openingState + ", will reassign later: " + region);
                                // Failed opening this region, reassign it later
                                forceRegionStateToOffline(region, true);
                    } catch (IOException e) {
                        if (e instanceof RemoteException) {
                            e = ((RemoteException) e).unwrapRemoteException();
                        if (e instanceof RegionServerStoppedException) {
                            LOG.warn("The region server was shut down, ", e);
                            // No need to retry, the region server is a goner.
                            return false;
                        } else if (e instanceof ServerNotRunningYetException) {
                            long now = System.currentTimeMillis();
                            if (now < maxWaitTime) {
                                if (LOG.isDebugEnabled()) {
                                    LOG.debug("Server is not yet up; waiting up to " + (maxWaitTime - now) + "ms", e);
                                // reset the try count
                        } else if (e instanceof && this.serverManager.isServerOnline(destination)) {
                            // open the region on the same server.
                            if (LOG.isDebugEnabled()) {
                                LOG.debug("Bulk assigner openRegion() to " + destination + " has timed out, but the regions might" + " already be opened on it.", e);
                            // wait and reset the re-try count, server might be just busy.
                        } else if (e instanceof FailedServerException && i < maximumAttempts) {
                            // In case the server is in the failed server list, no point to
                            // retry too soon. Retry after the failed_server_expiry time
                            long sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
                            if (LOG.isDebugEnabled()) {
                                LOG.debug(destination + " is on failed server list; waiting " + sleepTime + "ms", e);
                        throw e;
            } catch (IOException e) {
                // Can be a socket timeout, EOF, NoRouteToHost, etc
      "Unable to communicate with " + destination + " in order to assign regions, ", e);
                for (RegionState state : states) {
                    HRegionInfo region = state.getRegion();
                    forceRegionStateToOffline(region, true);
                return false;
        } finally {
            for (Lock lock : locks.values()) {
        if (!failedToOpenRegions.isEmpty()) {
            for (HRegionInfo region : failedToOpenRegions) {
                if (!regionStates.isRegionOnline(region)) {
        // wait for assignment completion
        ArrayList<HRegionInfo> userRegionSet = new ArrayList<>(regions.size());
        for (HRegionInfo region : regions) {
            if (!region.getTable().isSystemTable()) {
        if (!waitForAssignment(userRegionSet, true, userRegionSet.size(), System.currentTimeMillis())) {
            LOG.debug("some user regions are still in transition: " + userRegionSet);
        LOG.debug("Bulk assigning done for " + destination);
        return true;
    } finally {
        metricsAssignmentManager.updateBulkAssignTime(EnvironmentEdgeManager.currentTime() - startTime);
Also used : Configuration(org.apache.hadoop.conf.Configuration) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) FailedServerException(org.apache.hadoop.hbase.ipc.FailedServerException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) RegionServerStoppedException(org.apache.hadoop.hbase.regionserver.RegionServerStoppedException) HashSet(java.util.HashSet) Pair(org.apache.hadoop.hbase.util.Pair) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) IOException( ReentrantLock(java.util.concurrent.locks.ReentrantLock) Lock(java.util.concurrent.locks.Lock) ServerName(org.apache.hadoop.hbase.ServerName) RegionOpeningState(org.apache.hadoop.hbase.regionserver.RegionOpeningState) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 24 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class AssignmentManager method processRegionsInTransition.

   * Processes list of regions in transition at startup
void processRegionsInTransition(Collection<RegionState> regionsInTransition) {
    // to the region if the master dies right after the RPC call is out.
    for (RegionState regionState : regionsInTransition) {"Processing " + regionState);
        ServerName serverName = regionState.getServerName();
        // case, try assigning it here.
        if (serverName != null && !serverManager.getOnlineServers().containsKey(serverName)) {
  "Server " + serverName + " isn't online. SSH will handle this");
            // SSH will handle it
        HRegionInfo regionInfo = regionState.getRegion();
        RegionState.State state = regionState.getState();
        switch(state) {
            case CLOSED:
            case PENDING_OPEN:
            case PENDING_CLOSE:
            case FAILED_CLOSE:
            case FAILED_OPEN:
                // No process for other states
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ServerName(org.apache.hadoop.hbase.ServerName) State(org.apache.hadoop.hbase.master.RegionState.State)

Example 25 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class AssignmentManager method cleanOutCrashedServerReferences.

   * Clean out crashed server removing any assignments.
   * @param sn Server that went down.
   * @return list of regions in transition on this server
public List<HRegionInfo> cleanOutCrashedServerReferences(final ServerName sn) {
    // Clean out any existing assignment plans for this server
    synchronized (this.regionPlans) {
        for (Iterator<Map.Entry<String, RegionPlan>> i = this.regionPlans.entrySet().iterator(); i.hasNext(); ) {
            Map.Entry<String, RegionPlan> e =;
            ServerName otherSn = e.getValue().getDestination();
            // The name will be null if the region is planned for a random assign.
            if (otherSn != null && otherSn.equals(sn)) {
                // Use iterator's remove else we'll get CME
    List<HRegionInfo> rits = regionStates.serverOffline(sn);
    for (Iterator<HRegionInfo> it = rits.iterator(); it.hasNext(); ) {
        HRegionInfo hri =;
        String encodedName = hri.getEncodedName();
        // We need a lock on the region as we could update it
        Lock lock = locker.acquireLock(encodedName);
        try {
            RegionState regionState = regionStates.getRegionTransitionState(encodedName);
            if (regionState == null || (regionState.getServerName() != null && !regionState.isOnServer(sn)) || !RegionStates.isOneOfStates(regionState, State.PENDING_OPEN, State.OPENING, State.FAILED_OPEN, State.FAILED_CLOSE, State.OFFLINE)) {
      "Skip " + regionState + " since it is not opening/failed_close" + " on the dead server any more: " + sn);
            } else {
                if (tableStateManager.isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING)) {
                // Mark the region offline and assign it again by SSH
                regionStates.updateRegionState(hri, State.OFFLINE);
        } finally {
    return rits;
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ServerName(org.apache.hadoop.hbase.ServerName) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) NavigableMap(java.util.NavigableMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) ReentrantLock(java.util.concurrent.locks.ReentrantLock) Lock(java.util.concurrent.locks.Lock)


ServerName (org.apache.hadoop.hbase.ServerName)426 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)202 Test (org.junit.Test)163 ArrayList (java.util.ArrayList)97 TableName (org.apache.hadoop.hbase.TableName)89 IOException ( HashMap (java.util.HashMap)81 List (java.util.List)72 Map (java.util.Map)54 HRegionLocation (org.apache.hadoop.hbase.HRegionLocation)45 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)34 Table (org.apache.hadoop.hbase.client.Table)33 HashSet (java.util.HashSet)32 TreeMap (java.util.TreeMap)31 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)29 Configuration (org.apache.hadoop.conf.Configuration)26 HRegionServer (org.apache.hadoop.hbase.regionserver.HRegionServer)26 Pair (org.apache.hadoop.hbase.util.Pair)24 KeeperException (org.apache.zookeeper.KeeperException)23 InterruptedIOException (