Search in sources :

Example 1 with RegionStateNode

use of org.apache.hadoop.hbase.master.assignment.RegionStateNode in project hbase by apache.

the class MasterRpcServices method unassignRegion.

@Override
public UnassignRegionResponse unassignRegion(RpcController controller, UnassignRegionRequest req) throws ServiceException {
    try {
        final byte[] regionName = req.getRegion().getValue().toByteArray();
        RegionSpecifierType type = req.getRegion().getType();
        UnassignRegionResponse urr = UnassignRegionResponse.newBuilder().build();
        server.checkInitialized();
        if (type != RegionSpecifierType.REGION_NAME) {
            LOG.warn("unassignRegion specifier type: expected: " + RegionSpecifierType.REGION_NAME + " actual: " + type);
        }
        RegionStateNode rsn = server.getAssignmentManager().getRegionStates().getRegionStateNodeFromName(regionName);
        if (rsn == null) {
            throw new UnknownRegionException(Bytes.toString(regionName));
        }
        RegionInfo hri = rsn.getRegionInfo();
        if (server.cpHost != null) {
            server.cpHost.preUnassign(hri);
        }
        LOG.debug(server.getClientIdAuditPrefix() + " unassign " + hri.getRegionNameAsString() + " in current location if it is online");
        server.getAssignmentManager().unassign(hri);
        if (server.cpHost != null) {
            server.cpHost.postUnassign(hri);
        }
        return urr;
    } catch (IOException ioe) {
        throw new ServiceException(ioe);
    }
}
Also used : UnassignRegionResponse(org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.UnassignRegionResponse) ServiceException(org.apache.hbase.thirdparty.com.google.protobuf.ServiceException) RegionSpecifierType(org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionSpecifier.RegionSpecifierType) UnknownRegionException(org.apache.hadoop.hbase.UnknownRegionException) RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) IOException(java.io.IOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) RegionStateNode(org.apache.hadoop.hbase.master.assignment.RegionStateNode)

Example 2 with RegionStateNode

use of org.apache.hadoop.hbase.master.assignment.RegionStateNode in project hbase by apache.

the class HMaster method balance.

public BalanceResponse balance(BalanceRequest request) throws IOException {
    checkInitialized();
    BalanceResponse.Builder responseBuilder = BalanceResponse.newBuilder();
    if (loadBalancerTracker == null || !(loadBalancerTracker.isBalancerOn() || request.isDryRun())) {
        return responseBuilder.build();
    }
    if (skipRegionManagementAction("balancer")) {
        return responseBuilder.build();
    }
    synchronized (this.balancer) {
        // Only allow one balance run at at time.
        if (this.assignmentManager.hasRegionsInTransition()) {
            List<RegionStateNode> regionsInTransition = assignmentManager.getRegionsInTransition();
            // if hbase:meta region is in transition, result of assignment cannot be recorded
            // ignore the force flag in that case
            boolean metaInTransition = assignmentManager.isMetaRegionInTransition();
            List<RegionStateNode> toPrint = regionsInTransition;
            int max = 5;
            boolean truncated = false;
            if (regionsInTransition.size() > max) {
                toPrint = regionsInTransition.subList(0, max);
                truncated = true;
            }
            if (!request.isIgnoreRegionsInTransition() || metaInTransition) {
                LOG.info("Not running balancer (ignoreRIT=false" + ", metaRIT=" + metaInTransition + ") because " + regionsInTransition.size() + " region(s) in transition: " + toPrint + (truncated ? "(truncated list)" : ""));
                return responseBuilder.build();
            }
        }
        if (this.serverManager.areDeadServersInProgress()) {
            LOG.info("Not running balancer because processing dead regionserver(s): " + this.serverManager.getDeadServers());
            return responseBuilder.build();
        }
        if (this.cpHost != null) {
            try {
                if (this.cpHost.preBalance(request)) {
                    LOG.debug("Coprocessor bypassing balancer request");
                    return responseBuilder.build();
                }
            } catch (IOException ioe) {
                LOG.error("Error invoking master coprocessor preBalance()", ioe);
                return responseBuilder.build();
            }
        }
        Map<TableName, Map<ServerName, List<RegionInfo>>> assignments = this.assignmentManager.getRegionStates().getAssignmentsForBalancer(tableStateManager, this.serverManager.getOnlineServersList());
        for (Map<ServerName, List<RegionInfo>> serverMap : assignments.values()) {
            serverMap.keySet().removeAll(this.serverManager.getDrainingServersList());
        }
        // Give the balancer the current cluster state.
        this.balancer.updateClusterMetrics(getClusterMetricsWithoutCoprocessor());
        List<RegionPlan> plans = this.balancer.balanceCluster(assignments);
        responseBuilder.setBalancerRan(true).setMovesCalculated(plans == null ? 0 : plans.size());
        if (skipRegionManagementAction("balancer")) {
            // make one last check that the cluster isn't shutting down before proceeding.
            return responseBuilder.build();
        }
        // For dry run we don't actually want to execute the moves, but we do want
        // to execute the coprocessor below
        List<RegionPlan> sucRPs = request.isDryRun() ? Collections.emptyList() : executeRegionPlansWithThrottling(plans);
        if (this.cpHost != null) {
            try {
                this.cpHost.postBalance(request, sucRPs);
            } catch (IOException ioe) {
                // balancing already succeeded so don't change the result
                LOG.error("Error invoking master coprocessor postBalance()", ioe);
            }
        }
        responseBuilder.setMovesExecuted(sucRPs.size());
    }
    // Return true indicating a success.
    return responseBuilder.build();
}
Also used : RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) IOException(java.io.IOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) InterruptedIOException(java.io.InterruptedIOException) RSGroupAdminEndpoint(org.apache.hadoop.hbase.rsgroup.RSGroupAdminEndpoint) RegionStateNode(org.apache.hadoop.hbase.master.assignment.RegionStateNode) TableName(org.apache.hadoop.hbase.TableName) BalanceResponse(org.apache.hadoop.hbase.client.BalanceResponse) ServerName(org.apache.hadoop.hbase.ServerName) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) List(java.util.List) Map(java.util.Map) HashMap(java.util.HashMap)

Example 3 with RegionStateNode

use of org.apache.hadoop.hbase.master.assignment.RegionStateNode in project hbase by apache.

the class MasterDumpServlet method dumpRIT.

private void dumpRIT(HMaster master, PrintWriter out) {
    AssignmentManager am = master.getAssignmentManager();
    if (am == null) {
        out.println("AssignmentManager is not initialized");
        return;
    }
    for (RegionStateNode rs : am.getRegionsInTransition()) {
        String rid = rs.getRegionInfo().getEncodedName();
        out.println("Region " + rid + ": " + rs.toDescriptiveString());
    }
}
Also used : AssignmentManager(org.apache.hadoop.hbase.master.assignment.AssignmentManager) RegionStateNode(org.apache.hadoop.hbase.master.assignment.RegionStateNode)

Example 4 with RegionStateNode

use of org.apache.hadoop.hbase.master.assignment.RegionStateNode in project hbase by apache.

the class ReopenTableRegionsProcedure method executeFromState.

@Override
protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState state) throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
    switch(state) {
        case REOPEN_TABLE_REGIONS_GET_REGIONS:
            if (!isTableEnabled(env)) {
                LOG.info("Table {} is disabled, give up reopening its regions", tableName);
                return Flow.NO_MORE_STATE;
            }
            List<HRegionLocation> tableRegions = env.getAssignmentManager().getRegionStates().getRegionsOfTableForReopen(tableName);
            regions = getRegionLocationsForReopen(tableRegions);
            setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
            return Flow.HAS_MORE_STATE;
        case REOPEN_TABLE_REGIONS_REOPEN_REGIONS:
            for (HRegionLocation loc : regions) {
                RegionStateNode regionNode = env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion());
                // this possible, maybe the region has already been merged or split, see HBASE-20921
                if (regionNode == null) {
                    continue;
                }
                TransitRegionStateProcedure proc;
                regionNode.lock();
                try {
                    if (regionNode.getProcedure() != null) {
                        continue;
                    }
                    proc = TransitRegionStateProcedure.reopen(env, regionNode.getRegionInfo());
                    regionNode.setProcedure(proc);
                } finally {
                    regionNode.unlock();
                }
                addChildProcedure(proc);
            }
            setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_CONFIRM_REOPENED);
            return Flow.HAS_MORE_STATE;
        case REOPEN_TABLE_REGIONS_CONFIRM_REOPENED:
            regions = regions.stream().map(env.getAssignmentManager().getRegionStates()::checkReopened).filter(l -> l != null).collect(Collectors.toList());
            if (regions.isEmpty()) {
                return Flow.NO_MORE_STATE;
            }
            if (regions.stream().anyMatch(loc -> canSchedule(env, loc))) {
                retryCounter = null;
                setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
                return Flow.HAS_MORE_STATE;
            }
            // again.
            if (retryCounter == null) {
                retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
            }
            long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
            LOG.info("There are still {} region(s) which need to be reopened for table {} are in " + "OPENING state, suspend {}secs and try again later", regions.size(), tableName, backoff / 1000);
            setTimeout(Math.toIntExact(backoff));
            setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
            skipPersistence();
            throw new ProcedureSuspendedException();
        default:
            throw new UnsupportedOperationException("unhandled state=" + state);
    }
}
Also used : HRegionLocation(org.apache.hadoop.hbase.HRegionLocation) TransitRegionStateProcedure(org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure) ProcedureSuspendedException(org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException) RegionStateNode(org.apache.hadoop.hbase.master.assignment.RegionStateNode)

Example 5 with RegionStateNode

use of org.apache.hadoop.hbase.master.assignment.RegionStateNode in project hbase by apache.

the class ServerCrashProcedure method assignRegions.

/**
 * Assign the regions on the crashed RS to other Rses.
 * <p/>
 * In this method we will go through all the RegionStateNodes of the give regions to find out
 * whether there is already an TRSP for the region, if so we interrupt it and let it retry on
 * other server, otherwise we will schedule a TRSP to bring the region online.
 * <p/>
 * We will also check whether the table for a region is enabled, if not, we will skip assigning
 * it.
 */
private void assignRegions(MasterProcedureEnv env, List<RegionInfo> regions) throws IOException {
    AssignmentManager am = env.getMasterServices().getAssignmentManager();
    boolean retainAssignment = env.getMasterConfiguration().getBoolean(MASTER_SCP_RETAIN_ASSIGNMENT, DEFAULT_MASTER_SCP_RETAIN_ASSIGNMENT);
    for (RegionInfo region : regions) {
        RegionStateNode regionNode = am.getRegionStates().getOrCreateRegionStateNode(region);
        regionNode.lock();
        try {
            // in the way of our clearing out 'Unknown Servers'.
            if (!isMatchingRegionLocation(regionNode)) {
                // double checking here to confirm that we do not skip assignment incorrectly.
                if (!am.isRunning()) {
                    throw new DoNotRetryIOException("AssignmentManager has been stopped, can not process assignment any more");
                }
                LOG.info("{} found {} whose regionLocation no longer matches {}, skipping assign...", this, regionNode, serverName);
                continue;
            }
            if (regionNode.getProcedure() != null) {
                LOG.info("{} found RIT {}; {}", this, regionNode.getProcedure(), regionNode);
                regionNode.getProcedure().serverCrashed(env, regionNode, getServerName(), !retainAssignment);
                continue;
            }
            if (env.getMasterServices().getTableStateManager().isTableState(regionNode.getTable(), TableState.State.DISABLING)) {
                // We need to change the state here otherwise the TRSP scheduled by DTP will try to
                // close the region from a dead server and will never succeed. Please see HBASE-23636
                // for more details.
                env.getAssignmentManager().regionClosedAbnormally(regionNode);
                LOG.info("{} found table disabling for region {}, set it state to ABNORMALLY_CLOSED.", this, regionNode);
                continue;
            }
            if (env.getMasterServices().getTableStateManager().isTableState(regionNode.getTable(), TableState.State.DISABLED)) {
                // This should not happen, table disabled but has regions on server.
                LOG.warn("Found table disabled for region {}, procDetails: {}", regionNode, this);
                continue;
            }
            TransitRegionStateProcedure proc = TransitRegionStateProcedure.assign(env, region, !retainAssignment, null);
            regionNode.setProcedure(proc);
            addChildProcedure(proc);
        } finally {
            regionNode.unlock();
        }
    }
}
Also used : TransitRegionStateProcedure(org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) AssignmentManager(org.apache.hadoop.hbase.master.assignment.AssignmentManager) RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) RegionStateNode(org.apache.hadoop.hbase.master.assignment.RegionStateNode)

Aggregations

RegionStateNode (org.apache.hadoop.hbase.master.assignment.RegionStateNode)18 RegionInfo (org.apache.hadoop.hbase.client.RegionInfo)11 Test (org.junit.Test)9 IOException (java.io.IOException)7 ServerName (org.apache.hadoop.hbase.ServerName)7 TableName (org.apache.hadoop.hbase.TableName)6 AssignmentManager (org.apache.hadoop.hbase.master.assignment.AssignmentManager)5 TransitRegionStateProcedure (org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure)5 List (java.util.List)4 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)3 Waiter (org.apache.hadoop.hbase.Waiter)3 ConstraintException (org.apache.hadoop.hbase.constraint.ConstraintException)3 InterruptedIOException (java.io.InterruptedIOException)2 ArrayList (java.util.ArrayList)2 Map (java.util.Map)2 Random (java.util.Random)2 UnknownRegionException (org.apache.hadoop.hbase.UnknownRegionException)2 TableDescriptor (org.apache.hadoop.hbase.client.TableDescriptor)2 FavoredNodeAssignmentHelper (org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper)2 FavoredNodesManager (org.apache.hadoop.hbase.favored.FavoredNodesManager)2