Search in sources :

Example 16 with HRegionInfo

use of org.apache.hadoop.hbase.HRegionInfo in project hbase by apache.

the class MasterMetaBootstrap method assignMeta.

/**
   * Check <code>hbase:meta</code> is assigned. If not, assign it.
   */
protected void assignMeta(Set<ServerName> previouslyFailedMetaRSs, int replicaId) throws InterruptedException, IOException, KeeperException {
    final AssignmentManager assignmentManager = master.getAssignmentManager();
    // Work on meta region
    int assigned = 0;
    long timeout = master.getConfiguration().getLong("hbase.catalog.verification.timeout", 1000);
    if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
        status.setStatus("Assigning hbase:meta region");
    } else {
        status.setStatus("Assigning hbase:meta region, replicaId " + replicaId);
    }
    // Get current meta state from zk.
    RegionState metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper(), replicaId);
    HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, replicaId);
    RegionStates regionStates = assignmentManager.getRegionStates();
    regionStates.createRegionState(hri, metaState.getState(), metaState.getServerName(), null);
    if (!metaState.isOpened() || !master.getMetaTableLocator().verifyMetaRegionLocation(master.getClusterConnection(), master.getZooKeeper(), timeout, replicaId)) {
        ServerName currentMetaServer = metaState.getServerName();
        if (master.getServerManager().isServerOnline(currentMetaServer)) {
            if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
                LOG.info("Meta was in transition on " + currentMetaServer);
            } else {
                LOG.info("Meta with replicaId " + replicaId + " was in transition on " + currentMetaServer);
            }
            assignmentManager.processRegionsInTransition(Collections.singletonList(metaState));
        } else {
            if (currentMetaServer != null) {
                if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
                    splitMetaLogBeforeAssignment(currentMetaServer);
                    regionStates.logSplit(HRegionInfo.FIRST_META_REGIONINFO);
                    previouslyFailedMetaRSs.add(currentMetaServer);
                }
            }
            LOG.info("Re-assigning hbase:meta with replicaId, " + replicaId + " it was on " + currentMetaServer);
            assignmentManager.assignMeta(hri);
        }
        assigned++;
    }
    if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
        // TODO: should we prevent from using state manager before meta was initialized?
        // tableStateManager.start();
        master.getTableStateManager().setTableState(TableName.META_TABLE_NAME, TableState.State.ENABLED);
    }
    if ((RecoveryMode.LOG_REPLAY == master.getMasterWalManager().getLogRecoveryMode()) && (!previouslyFailedMetaRSs.isEmpty())) {
        // replay WAL edits mode need new hbase:meta RS is assigned firstly
        status.setStatus("replaying log for Meta Region");
        master.getMasterWalManager().splitMetaLog(previouslyFailedMetaRSs);
    }
    assignmentManager.setEnabledTable(TableName.META_TABLE_NAME);
    master.getTableStateManager().start();
    // No need to wait for meta is assigned = 0 when meta is just verified.
    if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID)
        enableCrashedServerProcessing(assigned != 0);
    LOG.info("hbase:meta with replicaId " + replicaId + " assigned=" + assigned + ", location=" + master.getMetaTableLocator().getMetaRegionLocation(master.getZooKeeper(), replicaId));
    status.setStatus("META assigned.");
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ServerName(org.apache.hadoop.hbase.ServerName)

Example 17 with HRegionInfo

use of org.apache.hadoop.hbase.HRegionInfo in project hbase by apache.

the class AssignmentManager method onRegionMergeReverted.

private String onRegionMergeReverted(final RegionState current, final HRegionInfo hri, final ServerName serverName, final RegionStateTransition transition) {
    // If the region is in offline state, it could be an RPC retry.
    if (current == null || !current.isMergingNewOrOfflineOnServer(serverName)) {
        return hri.getShortNameToLog() + " is not merging on " + serverName;
    }
    // Just return in case of retrying
    if (current.isOffline()) {
        return null;
    }
    final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1));
    final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2));
    RegionState rs_a = regionStates.getRegionState(a);
    RegionState rs_b = regionStates.getRegionState(b);
    if (rs_a == null || !rs_a.isMergingOnServer(serverName) || rs_b == null || !rs_b.isMergingOnServer(serverName)) {
        return "Some daughter is not known to be merging on " + serverName + ", a=" + rs_a + ", b=" + rs_b;
    }
    // Always bring the children back online. Even if they are not offline
    // there's no harm in making them online again.
    regionOnline(a, serverName);
    regionOnline(b, serverName);
    // Only offline the merging region if it is known to exist.
    RegionState rs_p = regionStates.getRegionState(hri);
    if (rs_p != null) {
        regionOffline(hri);
    }
    if (getTableStateManager().isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING)) {
        invokeUnAssign(a);
        invokeUnAssign(b);
    }
    return null;
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo)

Example 18 with HRegionInfo

use of org.apache.hadoop.hbase.HRegionInfo in project hbase by apache.

the class AssignmentManager method assign.

/**
   * Bulk assign regions to <code>destination</code>.
   * @param destination
   * @param regions Regions to assign.
   * @return true if successful
   */
boolean assign(final ServerName destination, final List<HRegionInfo> regions) throws InterruptedException {
    long startTime = EnvironmentEdgeManager.currentTime();
    try {
        int regionCount = regions.size();
        if (regionCount == 0) {
            return true;
        }
        LOG.info("Assigning " + regionCount + " region(s) to " + destination.toString());
        Set<String> encodedNames = new HashSet<>(regionCount);
        for (HRegionInfo region : regions) {
            encodedNames.add(region.getEncodedName());
        }
        List<HRegionInfo> failedToOpenRegions = new ArrayList<>();
        Map<String, Lock> locks = locker.acquireLocks(encodedNames);
        try {
            Map<String, RegionPlan> plans = new HashMap<>(regionCount);
            List<RegionState> states = new ArrayList<>(regionCount);
            for (HRegionInfo region : regions) {
                String encodedName = region.getEncodedName();
                if (!isDisabledorDisablingRegionInRIT(region)) {
                    RegionState state = forceRegionStateToOffline(region, false);
                    boolean onDeadServer = false;
                    if (state != null) {
                        if (regionStates.wasRegionOnDeadServer(encodedName)) {
                            LOG.info("Skip assigning " + region.getRegionNameAsString() + ", it's host " + regionStates.getLastRegionServerOfRegion(encodedName) + " is dead but not processed yet");
                            onDeadServer = true;
                        } else {
                            RegionPlan plan = new RegionPlan(region, state.getServerName(), destination);
                            plans.put(encodedName, plan);
                            states.add(state);
                            continue;
                        }
                    }
                    // Reassign if the region wasn't on a dead server
                    if (!onDeadServer) {
                        LOG.info("failed to force region state to offline, " + "will reassign later: " + region);
                        // assign individually later
                        failedToOpenRegions.add(region);
                    }
                }
                // Release the lock, this region is excluded from bulk assign because
                // we can't update its state, or set its znode to offline.
                Lock lock = locks.remove(encodedName);
                lock.unlock();
            }
            if (server.isStopped()) {
                return false;
            }
            // Add region plans, so we can updateTimers when one region is opened so
            // that unnecessary timeout on RIT is reduced.
            this.addPlans(plans);
            List<Pair<HRegionInfo, List<ServerName>>> regionOpenInfos = new ArrayList<>(states.size());
            for (RegionState state : states) {
                HRegionInfo region = state.getRegion();
                regionStates.updateRegionState(region, State.PENDING_OPEN, destination);
                List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
                if (shouldAssignFavoredNodes(region)) {
                    favoredNodes = server.getFavoredNodesManager().getFavoredNodesWithDNPort(region);
                }
                regionOpenInfos.add(new Pair<>(region, favoredNodes));
            }
            // Move on to open regions.
            try {
                // Send OPEN RPC. If it fails on a IOE or RemoteException,
                // regions will be assigned individually.
                Configuration conf = server.getConfiguration();
                long maxWaitTime = System.currentTimeMillis() + conf.getLong("hbase.regionserver.rpc.startup.waittime", 60000);
                for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) {
                    try {
                        List<RegionOpeningState> regionOpeningStateList = serverManager.sendRegionOpen(destination, regionOpenInfos);
                        for (int k = 0, n = regionOpeningStateList.size(); k < n; k++) {
                            RegionOpeningState openingState = regionOpeningStateList.get(k);
                            if (openingState != RegionOpeningState.OPENED) {
                                HRegionInfo region = regionOpenInfos.get(k).getFirst();
                                LOG.info("Got opening state " + openingState + ", will reassign later: " + region);
                                // Failed opening this region, reassign it later
                                forceRegionStateToOffline(region, true);
                                failedToOpenRegions.add(region);
                            }
                        }
                        break;
                    } catch (IOException e) {
                        if (e instanceof RemoteException) {
                            e = ((RemoteException) e).unwrapRemoteException();
                        }
                        if (e instanceof RegionServerStoppedException) {
                            LOG.warn("The region server was shut down, ", e);
                            // No need to retry, the region server is a goner.
                            return false;
                        } else if (e instanceof ServerNotRunningYetException) {
                            long now = System.currentTimeMillis();
                            if (now < maxWaitTime) {
                                if (LOG.isDebugEnabled()) {
                                    LOG.debug("Server is not yet up; waiting up to " + (maxWaitTime - now) + "ms", e);
                                }
                                Thread.sleep(100);
                                // reset the try count
                                i--;
                                continue;
                            }
                        } else if (e instanceof java.net.SocketTimeoutException && this.serverManager.isServerOnline(destination)) {
                            // open the region on the same server.
                            if (LOG.isDebugEnabled()) {
                                LOG.debug("Bulk assigner openRegion() to " + destination + " has timed out, but the regions might" + " already be opened on it.", e);
                            }
                            // wait and reset the re-try count, server might be just busy.
                            Thread.sleep(100);
                            i--;
                            continue;
                        } else if (e instanceof FailedServerException && i < maximumAttempts) {
                            // In case the server is in the failed server list, no point to
                            // retry too soon. Retry after the failed_server_expiry time
                            long sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
                            if (LOG.isDebugEnabled()) {
                                LOG.debug(destination + " is on failed server list; waiting " + sleepTime + "ms", e);
                            }
                            Thread.sleep(sleepTime);
                            continue;
                        }
                        throw e;
                    }
                }
            } catch (IOException e) {
                // Can be a socket timeout, EOF, NoRouteToHost, etc
                LOG.info("Unable to communicate with " + destination + " in order to assign regions, ", e);
                for (RegionState state : states) {
                    HRegionInfo region = state.getRegion();
                    forceRegionStateToOffline(region, true);
                }
                return false;
            }
        } finally {
            for (Lock lock : locks.values()) {
                lock.unlock();
            }
        }
        if (!failedToOpenRegions.isEmpty()) {
            for (HRegionInfo region : failedToOpenRegions) {
                if (!regionStates.isRegionOnline(region)) {
                    invokeAssign(region);
                }
            }
        }
        // wait for assignment completion
        ArrayList<HRegionInfo> userRegionSet = new ArrayList<>(regions.size());
        for (HRegionInfo region : regions) {
            if (!region.getTable().isSystemTable()) {
                userRegionSet.add(region);
            }
        }
        if (!waitForAssignment(userRegionSet, true, userRegionSet.size(), System.currentTimeMillis())) {
            LOG.debug("some user regions are still in transition: " + userRegionSet);
        }
        LOG.debug("Bulk assigning done for " + destination);
        return true;
    } finally {
        metricsAssignmentManager.updateBulkAssignTime(EnvironmentEdgeManager.currentTime() - startTime);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) FailedServerException(org.apache.hadoop.hbase.ipc.FailedServerException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) RegionServerStoppedException(org.apache.hadoop.hbase.regionserver.RegionServerStoppedException) HashSet(java.util.HashSet) Pair(org.apache.hadoop.hbase.util.Pair) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) IOException(java.io.IOException) ReentrantLock(java.util.concurrent.locks.ReentrantLock) Lock(java.util.concurrent.locks.Lock) ServerName(org.apache.hadoop.hbase.ServerName) RegionOpeningState(org.apache.hadoop.hbase.regionserver.RegionOpeningState) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 19 with HRegionInfo

use of org.apache.hadoop.hbase.HRegionInfo in project hbase by apache.

the class AssignmentManager method processRegionsInTransition.

/**
   * Processes list of regions in transition at startup
   */
void processRegionsInTransition(Collection<RegionState> regionsInTransition) {
    // to the region if the master dies right after the RPC call is out.
    for (RegionState regionState : regionsInTransition) {
        LOG.info("Processing " + regionState);
        ServerName serverName = regionState.getServerName();
        // case, try assigning it here.
        if (serverName != null && !serverManager.getOnlineServers().containsKey(serverName)) {
            LOG.info("Server " + serverName + " isn't online. SSH will handle this");
            // SSH will handle it
            continue;
        }
        HRegionInfo regionInfo = regionState.getRegion();
        RegionState.State state = regionState.getState();
        switch(state) {
            case CLOSED:
                invokeAssign(regionState.getRegion());
                break;
            case PENDING_OPEN:
                retrySendRegionOpen(regionState);
                break;
            case PENDING_CLOSE:
                retrySendRegionClose(regionState);
                break;
            case FAILED_CLOSE:
            case FAILED_OPEN:
                invokeUnAssign(regionInfo);
                break;
            default:
                // No process for other states
                break;
        }
    }
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ServerName(org.apache.hadoop.hbase.ServerName) State(org.apache.hadoop.hbase.master.RegionState.State)

Example 20 with HRegionInfo

use of org.apache.hadoop.hbase.HRegionInfo in project hbase by apache.

the class AssignmentManager method onRegionMerged.

private String onRegionMerged(final RegionState current, final HRegionInfo hri, final ServerName serverName, final RegionStateTransition transition) {
    // it could be a reportRegionTransition RPC retry.
    if (current == null || !current.isMergingNewOrOpenedOnServer(serverName)) {
        return hri.getShortNameToLog() + " is not merging on " + serverName;
    }
    // Just return in case of retrying
    if (current.isOpened()) {
        return null;
    }
    final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1));
    final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2));
    RegionState rs_a = regionStates.getRegionState(a);
    RegionState rs_b = regionStates.getRegionState(b);
    if (rs_a == null || !rs_a.isMergingOnServer(serverName) || rs_b == null || !rs_b.isMergingOnServer(serverName)) {
        return "Some daughter is not known to be merging on " + serverName + ", a=" + rs_a + ", b=" + rs_b;
    }
    regionOffline(a, State.MERGED);
    regionOffline(b, State.MERGED);
    regionOnline(hri, serverName, 1);
    try {
        processFavoredNodesForMerge(hri, a, b);
    } catch (IOException e) {
        LOG.error("Error while processing favored nodes after merge.", e);
        return StringUtils.stringifyException(e);
    }
    // User could disable the table before master knows the new region.
    if (getTableStateManager().isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING)) {
        invokeUnAssign(hri);
    } else {
        Callable<Object> mergeReplicasCallable = new Callable<Object>() {

            @Override
            public Object call() {
                doMergingOfReplicas(hri, a, b);
                return null;
            }
        };
        threadPoolExecutorService.submit(mergeReplicasCallable);
    }
    return null;
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) IOException(java.io.IOException) Callable(java.util.concurrent.Callable)

Aggregations

HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)408 ServerName (org.apache.hadoop.hbase.ServerName)153 Test (org.junit.Test)141 TableName (org.apache.hadoop.hbase.TableName)118 ArrayList (java.util.ArrayList)86 IOException (java.io.IOException)83 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)75 Path (org.apache.hadoop.fs.Path)63 List (java.util.List)59 HashMap (java.util.HashMap)57 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)49 Table (org.apache.hadoop.hbase.client.Table)47 Map (java.util.Map)43 HRegionServer (org.apache.hadoop.hbase.regionserver.HRegionServer)41 FileSystem (org.apache.hadoop.fs.FileSystem)40 Configuration (org.apache.hadoop.conf.Configuration)38 HRegionLocation (org.apache.hadoop.hbase.HRegionLocation)35 TreeMap (java.util.TreeMap)26 HashSet (java.util.HashSet)23 HBaseIOException (org.apache.hadoop.hbase.HBaseIOException)22