use of org.apache.hadoop.hbase.master.replication.ClaimReplicationQueuesProcedure in project hbase by apache.
the class ServerCrashProcedure method executeFromState.
@Override
protected Flow executeFromState(MasterProcedureEnv env, ServerCrashState state) throws ProcedureSuspendedException, ProcedureYieldException {
final MasterServices services = env.getMasterServices();
final AssignmentManager am = env.getAssignmentManager();
updateProgress(true);
// Server gets removed from processing list below on procedure successful finish.
if (!notifiedDeadServer) {
notifiedDeadServer = true;
}
switch(state) {
case SERVER_CRASH_START:
case SERVER_CRASH_SPLIT_META_LOGS:
case SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR:
case SERVER_CRASH_ASSIGN_META:
break;
default:
// If hbase:meta is not assigned, yield.
if (env.getAssignmentManager().waitMetaLoaded(this)) {
throw new ProcedureSuspendedException();
}
}
try {
switch(state) {
case SERVER_CRASH_START:
LOG.info("Start " + this);
// If carrying meta, process it first. Else, get list of regions on crashed server.
if (this.carryingMeta) {
setNextState(ServerCrashState.SERVER_CRASH_SPLIT_META_LOGS);
} else {
setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
}
break;
case SERVER_CRASH_SPLIT_META_LOGS:
if (env.getMasterConfiguration().getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK, DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK)) {
zkCoordinatedSplitMetaLogs(env);
setNextState(ServerCrashState.SERVER_CRASH_ASSIGN_META);
} else {
am.getRegionStates().metaLogSplitting(serverName);
addChildProcedure(createSplittingWalProcedures(env, true));
setNextState(ServerCrashState.SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR);
}
break;
case SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR:
if (isSplittingDone(env, true)) {
setNextState(ServerCrashState.SERVER_CRASH_ASSIGN_META);
am.getRegionStates().metaLogSplit(serverName);
} else {
setNextState(ServerCrashState.SERVER_CRASH_SPLIT_META_LOGS);
}
break;
case SERVER_CRASH_ASSIGN_META:
assignRegions(env, Arrays.asList(RegionInfoBuilder.FIRST_META_REGIONINFO));
setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS);
break;
case SERVER_CRASH_GET_REGIONS:
this.regionsOnCrashedServer = getRegionsOnCrashedServer(env);
// if we should do distributed log splitting.
if (regionsOnCrashedServer != null) {
LOG.info("{} had {} regions", serverName, regionsOnCrashedServer.size());
if (LOG.isTraceEnabled()) {
this.regionsOnCrashedServer.stream().forEach(ri -> LOG.trace(ri.getShortNameToLog()));
}
}
if (!this.shouldSplitWal) {
setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
} else {
setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
}
break;
case SERVER_CRASH_SPLIT_LOGS:
if (env.getMasterConfiguration().getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK, DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK)) {
zkCoordinatedSplitLogs(env);
setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
} else {
am.getRegionStates().logSplitting(this.serverName);
addChildProcedure(createSplittingWalProcedures(env, false));
setNextState(ServerCrashState.SERVER_CRASH_DELETE_SPLIT_WALS_DIR);
}
break;
case SERVER_CRASH_DELETE_SPLIT_WALS_DIR:
if (isSplittingDone(env, false)) {
cleanupSplitDir(env);
setNextState(ServerCrashState.SERVER_CRASH_ASSIGN);
am.getRegionStates().logSplit(this.serverName);
} else {
setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS);
}
break;
case SERVER_CRASH_ASSIGN:
// Filter changes this.regionsOnCrashedServer.
if (filterDefaultMetaRegions()) {
if (LOG.isTraceEnabled()) {
LOG.trace("Assigning regions " + RegionInfo.getShortNameToLog(regionsOnCrashedServer) + ", " + this + "; cycles=" + getCycles());
}
assignRegions(env, regionsOnCrashedServer);
}
setNextState(ServerCrashState.SERVER_CRASH_CLAIM_REPLICATION_QUEUES);
break;
case SERVER_CRASH_HANDLE_RIT2:
// Noop. Left in place because we used to call handleRIT here for a second time
// but no longer necessary since HBASE-20634.
setNextState(ServerCrashState.SERVER_CRASH_CLAIM_REPLICATION_QUEUES);
break;
case SERVER_CRASH_CLAIM_REPLICATION_QUEUES:
addChildProcedure(new ClaimReplicationQueuesProcedure(serverName));
setNextState(ServerCrashState.SERVER_CRASH_FINISH);
break;
case SERVER_CRASH_FINISH:
LOG.info("removed crashed server {} after splitting done", serverName);
services.getAssignmentManager().getRegionStates().removeServer(serverName);
updateProgress(true);
return Flow.NO_MORE_STATE;
default:
throw new UnsupportedOperationException("unhandled state=" + state);
}
} catch (IOException e) {
LOG.warn("Failed state=" + state + ", retry " + this + "; cycles=" + getCycles(), e);
}
return Flow.HAS_MORE_STATE;
}
use of org.apache.hadoop.hbase.master.replication.ClaimReplicationQueuesProcedure in project hbase by apache.
the class TestClaimReplicationQueue method testClaim.
@Test
public void testClaim() throws Exception {
// disable the peers
hbaseAdmin.disableReplicationPeer(PEER_ID2);
hbaseAdmin.disableReplicationPeer(PEER_ID3);
// put some data
int count1 = UTIL1.loadTable(htable1, famName);
int count2 = UTIL1.loadTable(table3, famName);
EMPTY = true;
UTIL1.getMiniHBaseCluster().stopRegionServer(0).join();
UTIL1.getMiniHBaseCluster().startRegionServer();
// since there is no active region server to get the replication queue, the procedure should be
// in WAITING_TIMEOUT state for most time to retry
HMaster master = UTIL1.getMiniHBaseCluster().getMaster();
UTIL1.waitFor(30000, () -> master.getProcedures().stream().filter(p -> p instanceof ClaimReplicationQueuesProcedure).anyMatch(p -> p.getState() == ProcedureState.WAITING_TIMEOUT));
hbaseAdmin.enableReplicationPeer(PEER_ID2);
hbaseAdmin.enableReplicationPeer(PEER_ID3);
EMPTY = false;
// wait until the SCP finished, ClaimReplicationQueuesProcedure is a sub procedure of SCP
UTIL1.waitFor(30000, () -> master.getProcedures().stream().filter(p -> p instanceof ServerCrashProcedure).allMatch(Procedure::isSuccess));
// we should get all the data in the target cluster
waitForReplication(htable2, count1, NB_RETRIES);
waitForReplication(table4, count2, NB_RETRIES);
}
Aggregations