Search in sources :

Example 1 with ProcedureSuspendedException

use of org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException in project hbase by apache.

the class TransitRegionStateProcedure method executeFromState.

@Override
protected Flow executeFromState(MasterProcedureEnv env, RegionStateTransitionState state) throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
    RegionStateNode regionNode = getRegionStateNode(env);
    try {
        switch(state) {
            case REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE:
                // master, do not try to assign the replica region, log error and return.
                if (!RegionReplicaUtil.isDefaultReplica(regionNode.getRegionInfo())) {
                    RegionInfo defaultRI = RegionReplicaUtil.getRegionInfoForDefaultReplica(regionNode.getRegionInfo());
                    if (env.getMasterServices().getAssignmentManager().getRegionStates().getRegionStateNode(defaultRI) == null) {
                        LOG.error("Cannot assign replica region {} because its primary region {} does not exist.", regionNode.getRegionInfo(), defaultRI);
                        regionNode.unsetProcedure(this);
                        return Flow.NO_MORE_STATE;
                    }
                }
                queueAssign(env, regionNode);
                return Flow.HAS_MORE_STATE;
            case REGION_STATE_TRANSITION_OPEN:
                openRegion(env, regionNode);
                return Flow.HAS_MORE_STATE;
            case REGION_STATE_TRANSITION_CONFIRM_OPENED:
                return confirmOpened(env, regionNode);
            case REGION_STATE_TRANSITION_CLOSE:
                closeRegion(env, regionNode);
                return Flow.HAS_MORE_STATE;
            case REGION_STATE_TRANSITION_CONFIRM_CLOSED:
                return confirmClosed(env, regionNode);
            default:
                throw new UnsupportedOperationException("unhandled state=" + state);
        }
    } catch (IOException e) {
        if (retryCounter == null) {
            retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
        }
        long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
        LOG.warn("Failed transition, suspend {}secs {}; {}; waiting on rectified condition fixed " + "by other Procedure or operator intervention", backoff / 1000, this, regionNode.toShortString(), e);
        setTimeout(Math.toIntExact(backoff));
        setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
        skipPersistence();
        throw new ProcedureSuspendedException();
    }
}
Also used : RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) IOException(java.io.IOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) ProcedureSuspendedException(org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException)

Example 2 with ProcedureSuspendedException

use of org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException in project hbase by apache.

the class RecoverStandbyProcedure method executeFromState.

@Override
protected Flow executeFromState(MasterProcedureEnv env, RecoverStandbyState state) throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
    SyncReplicationReplayWALManager syncReplicationReplayWALManager = env.getMasterServices().getSyncReplicationReplayWALManager();
    switch(state) {
        case RENAME_SYNC_REPLICATION_WALS_DIR:
            try {
                syncReplicationReplayWALManager.renameToPeerReplayWALDir(peerId);
            } catch (IOException e) {
                LOG.warn("Failed to rename remote wal dir for peer id={}", peerId, e);
                setFailure("master-recover-standby", e);
                return Flow.NO_MORE_STATE;
            }
            setNextState(RecoverStandbyState.REGISTER_PEER_TO_WORKER_STORAGE);
            return Flow.HAS_MORE_STATE;
        case REGISTER_PEER_TO_WORKER_STORAGE:
            syncReplicationReplayWALManager.registerPeer(peerId);
            setNextState(RecoverStandbyState.DISPATCH_WALS);
            return Flow.HAS_MORE_STATE;
        case DISPATCH_WALS:
            dispathWals(syncReplicationReplayWALManager);
            setNextState(RecoverStandbyState.UNREGISTER_PEER_FROM_WORKER_STORAGE);
            return Flow.HAS_MORE_STATE;
        case UNREGISTER_PEER_FROM_WORKER_STORAGE:
            syncReplicationReplayWALManager.unregisterPeer(peerId);
            setNextState(RecoverStandbyState.SNAPSHOT_SYNC_REPLICATION_WALS_DIR);
            return Flow.HAS_MORE_STATE;
        case SNAPSHOT_SYNC_REPLICATION_WALS_DIR:
            try {
                syncReplicationReplayWALManager.renameToPeerSnapshotWALDir(peerId);
            } catch (IOException e) {
                LOG.warn("Failed to cleanup replay wals dir for peer id={}, , retry", peerId, e);
                throw new ProcedureYieldException();
            }
            return Flow.NO_MORE_STATE;
        default:
            throw new UnsupportedOperationException("unhandled state=" + state);
    }
}
Also used : IOException(java.io.IOException) ProcedureYieldException(org.apache.hadoop.hbase.procedure2.ProcedureYieldException)

Example 3 with ProcedureSuspendedException

use of org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException in project hbase by apache.

the class ReopenTableRegionsProcedure method executeFromState.

@Override
protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState state) throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
    switch(state) {
        case REOPEN_TABLE_REGIONS_GET_REGIONS:
            if (!isTableEnabled(env)) {
                LOG.info("Table {} is disabled, give up reopening its regions", tableName);
                return Flow.NO_MORE_STATE;
            }
            List<HRegionLocation> tableRegions = env.getAssignmentManager().getRegionStates().getRegionsOfTableForReopen(tableName);
            regions = getRegionLocationsForReopen(tableRegions);
            setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
            return Flow.HAS_MORE_STATE;
        case REOPEN_TABLE_REGIONS_REOPEN_REGIONS:
            for (HRegionLocation loc : regions) {
                RegionStateNode regionNode = env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion());
                // this possible, maybe the region has already been merged or split, see HBASE-20921
                if (regionNode == null) {
                    continue;
                }
                TransitRegionStateProcedure proc;
                regionNode.lock();
                try {
                    if (regionNode.getProcedure() != null) {
                        continue;
                    }
                    proc = TransitRegionStateProcedure.reopen(env, regionNode.getRegionInfo());
                    regionNode.setProcedure(proc);
                } finally {
                    regionNode.unlock();
                }
                addChildProcedure(proc);
            }
            setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_CONFIRM_REOPENED);
            return Flow.HAS_MORE_STATE;
        case REOPEN_TABLE_REGIONS_CONFIRM_REOPENED:
            regions = regions.stream().map(env.getAssignmentManager().getRegionStates()::checkReopened).filter(l -> l != null).collect(Collectors.toList());
            if (regions.isEmpty()) {
                return Flow.NO_MORE_STATE;
            }
            if (regions.stream().anyMatch(loc -> canSchedule(env, loc))) {
                retryCounter = null;
                setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
                return Flow.HAS_MORE_STATE;
            }
            // again.
            if (retryCounter == null) {
                retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
            }
            long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
            LOG.info("There are still {} region(s) which need to be reopened for table {} are in " + "OPENING state, suspend {}secs and try again later", regions.size(), tableName, backoff / 1000);
            setTimeout(Math.toIntExact(backoff));
            setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
            skipPersistence();
            throw new ProcedureSuspendedException();
        default:
            throw new UnsupportedOperationException("unhandled state=" + state);
    }
}
Also used : HRegionLocation(org.apache.hadoop.hbase.HRegionLocation) TransitRegionStateProcedure(org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure) ProcedureSuspendedException(org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException) RegionStateNode(org.apache.hadoop.hbase.master.assignment.RegionStateNode)

Example 4 with ProcedureSuspendedException

use of org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException in project hbase by apache.

the class InitMetaProcedure method executeFromState.

@Override
protected Flow executeFromState(MasterProcedureEnv env, InitMetaState state) throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
    LOG.debug("Execute {}", this);
    try {
        switch(state) {
            case INIT_META_WRITE_FS_LAYOUT:
                Configuration conf = env.getMasterConfiguration();
                Path rootDir = CommonFSUtils.getRootDir(conf);
                TableDescriptor td = writeFsLayout(rootDir, conf);
                env.getMasterServices().getTableDescriptors().update(td, true);
                setNextState(InitMetaState.INIT_META_ASSIGN_META);
                return Flow.HAS_MORE_STATE;
            case INIT_META_ASSIGN_META:
                LOG.info("Going to assign meta");
                addChildProcedure(env.getAssignmentManager().createAssignProcedures(Arrays.asList(RegionInfoBuilder.FIRST_META_REGIONINFO)));
                setNextState(InitMetaState.INIT_META_CREATE_NAMESPACES);
                return Flow.HAS_MORE_STATE;
            case INIT_META_CREATE_NAMESPACES:
                LOG.info("Going to create {} and {} namespaces", DEFAULT_NAMESPACE, SYSTEM_NAMESPACE);
                createDirectory(env, DEFAULT_NAMESPACE);
                createDirectory(env, SYSTEM_NAMESPACE);
                // here the TableNamespaceManager has not been initialized yet, so we have to insert the
                // record directly into meta table, later the TableNamespaceManager will load these two
                // namespaces when starting.
                insertNamespaceToMeta(env.getMasterServices().getConnection(), DEFAULT_NAMESPACE);
                insertNamespaceToMeta(env.getMasterServices().getConnection(), SYSTEM_NAMESPACE);
                return Flow.NO_MORE_STATE;
            default:
                throw new UnsupportedOperationException("unhandled state=" + state);
        }
    } catch (IOException e) {
        if (retryCounter == null) {
            retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
        }
        long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
        LOG.warn("Failed to init meta, suspend {}secs", backoff, e);
        setTimeout(Math.toIntExact(backoff));
        setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
        skipPersistence();
        throw new ProcedureSuspendedException();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) IOException(java.io.IOException) ProcedureSuspendedException(org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException) TableDescriptor(org.apache.hadoop.hbase.client.TableDescriptor)

Example 5 with ProcedureSuspendedException

use of org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException in project hbase by apache.

the class TestSplitWALManager method testAddNewServer.

@Test
public void testAddNewServer() throws Exception {
    List<FakeServerProcedure> testProcedures = new ArrayList<>();
    for (int i = 0; i < 4; i++) {
        testProcedures.add(new FakeServerProcedure(TEST_UTIL.getHBaseCluster().getServerHoldingMeta()));
    }
    ServerName server = splitWALManager.acquireSplitWALWorker(testProcedures.get(0));
    Assert.assertNotNull(server);
    Assert.assertNotNull(splitWALManager.acquireSplitWALWorker(testProcedures.get(1)));
    Assert.assertNotNull(splitWALManager.acquireSplitWALWorker(testProcedures.get(2)));
    Exception e = null;
    try {
        splitWALManager.acquireSplitWALWorker(testProcedures.get(3));
    } catch (ProcedureSuspendedException suspendException) {
        e = suspendException;
    }
    Assert.assertNotNull(e);
    Assert.assertTrue(e instanceof ProcedureSuspendedException);
    JVMClusterUtil.RegionServerThread newServer = TEST_UTIL.getHBaseCluster().startRegionServer();
    newServer.waitForServerOnline();
    Assert.assertNotNull(splitWALManager.acquireSplitWALWorker(testProcedures.get(3)));
}
Also used : JVMClusterUtil(org.apache.hadoop.hbase.util.JVMClusterUtil) ServerName(org.apache.hadoop.hbase.ServerName) ArrayList(java.util.ArrayList) ProcedureSuspendedException(org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException) ProcedureSuspendedException(org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException) ProcedureYieldException(org.apache.hadoop.hbase.procedure2.ProcedureYieldException) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

ProcedureSuspendedException (org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException)11 IOException (java.io.IOException)9 ServerName (org.apache.hadoop.hbase.ServerName)5 ProcedureYieldException (org.apache.hadoop.hbase.procedure2.ProcedureYieldException)4 Test (org.junit.Test)3 ArrayList (java.util.ArrayList)2 Configuration (org.apache.hadoop.conf.Configuration)1 Path (org.apache.hadoop.fs.Path)1 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)1 HBaseIOException (org.apache.hadoop.hbase.HBaseIOException)1 HRegionLocation (org.apache.hadoop.hbase.HRegionLocation)1 RegionInfo (org.apache.hadoop.hbase.client.RegionInfo)1 TableDescriptor (org.apache.hadoop.hbase.client.TableDescriptor)1 MasterServices (org.apache.hadoop.hbase.master.MasterServices)1 SplitWALManager (org.apache.hadoop.hbase.master.SplitWALManager)1 AssignmentManager (org.apache.hadoop.hbase.master.assignment.AssignmentManager)1 RegionStateNode (org.apache.hadoop.hbase.master.assignment.RegionStateNode)1 TransitRegionStateProcedure (org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure)1 ClaimReplicationQueuesProcedure (org.apache.hadoop.hbase.master.replication.ClaimReplicationQueuesProcedure)1 FailedRemoteDispatchException (org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException)1