use of org.apache.hadoop.hbase.HRegionInfo in project hbase by apache.
the class MasterMetaBootstrap method assignMeta.
/**
* Check <code>hbase:meta</code> is assigned. If not, assign it.
*/
protected void assignMeta(Set<ServerName> previouslyFailedMetaRSs, int replicaId) throws InterruptedException, IOException, KeeperException {
final AssignmentManager assignmentManager = master.getAssignmentManager();
// Work on meta region
int assigned = 0;
long timeout = master.getConfiguration().getLong("hbase.catalog.verification.timeout", 1000);
if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
status.setStatus("Assigning hbase:meta region");
} else {
status.setStatus("Assigning hbase:meta region, replicaId " + replicaId);
}
// Get current meta state from zk.
RegionState metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper(), replicaId);
HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, replicaId);
RegionStates regionStates = assignmentManager.getRegionStates();
regionStates.createRegionState(hri, metaState.getState(), metaState.getServerName(), null);
if (!metaState.isOpened() || !master.getMetaTableLocator().verifyMetaRegionLocation(master.getClusterConnection(), master.getZooKeeper(), timeout, replicaId)) {
ServerName currentMetaServer = metaState.getServerName();
if (master.getServerManager().isServerOnline(currentMetaServer)) {
if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
LOG.info("Meta was in transition on " + currentMetaServer);
} else {
LOG.info("Meta with replicaId " + replicaId + " was in transition on " + currentMetaServer);
}
assignmentManager.processRegionsInTransition(Collections.singletonList(metaState));
} else {
if (currentMetaServer != null) {
if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
splitMetaLogBeforeAssignment(currentMetaServer);
regionStates.logSplit(HRegionInfo.FIRST_META_REGIONINFO);
previouslyFailedMetaRSs.add(currentMetaServer);
}
}
LOG.info("Re-assigning hbase:meta with replicaId, " + replicaId + " it was on " + currentMetaServer);
assignmentManager.assignMeta(hri);
}
assigned++;
}
if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
// TODO: should we prevent from using state manager before meta was initialized?
// tableStateManager.start();
master.getTableStateManager().setTableState(TableName.META_TABLE_NAME, TableState.State.ENABLED);
}
if ((RecoveryMode.LOG_REPLAY == master.getMasterWalManager().getLogRecoveryMode()) && (!previouslyFailedMetaRSs.isEmpty())) {
// replay WAL edits mode need new hbase:meta RS is assigned firstly
status.setStatus("replaying log for Meta Region");
master.getMasterWalManager().splitMetaLog(previouslyFailedMetaRSs);
}
assignmentManager.setEnabledTable(TableName.META_TABLE_NAME);
master.getTableStateManager().start();
// No need to wait for meta is assigned = 0 when meta is just verified.
if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID)
enableCrashedServerProcessing(assigned != 0);
LOG.info("hbase:meta with replicaId " + replicaId + " assigned=" + assigned + ", location=" + master.getMetaTableLocator().getMetaRegionLocation(master.getZooKeeper(), replicaId));
status.setStatus("META assigned.");
}
use of org.apache.hadoop.hbase.HRegionInfo in project hbase by apache.
the class AssignmentManager method onRegionMergeReverted.
private String onRegionMergeReverted(final RegionState current, final HRegionInfo hri, final ServerName serverName, final RegionStateTransition transition) {
// If the region is in offline state, it could be an RPC retry.
if (current == null || !current.isMergingNewOrOfflineOnServer(serverName)) {
return hri.getShortNameToLog() + " is not merging on " + serverName;
}
// Just return in case of retrying
if (current.isOffline()) {
return null;
}
final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1));
final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2));
RegionState rs_a = regionStates.getRegionState(a);
RegionState rs_b = regionStates.getRegionState(b);
if (rs_a == null || !rs_a.isMergingOnServer(serverName) || rs_b == null || !rs_b.isMergingOnServer(serverName)) {
return "Some daughter is not known to be merging on " + serverName + ", a=" + rs_a + ", b=" + rs_b;
}
// Always bring the children back online. Even if they are not offline
// there's no harm in making them online again.
regionOnline(a, serverName);
regionOnline(b, serverName);
// Only offline the merging region if it is known to exist.
RegionState rs_p = regionStates.getRegionState(hri);
if (rs_p != null) {
regionOffline(hri);
}
if (getTableStateManager().isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING)) {
invokeUnAssign(a);
invokeUnAssign(b);
}
return null;
}
use of org.apache.hadoop.hbase.HRegionInfo in project hbase by apache.
the class AssignmentManager method assign.
/**
* Bulk assign regions to <code>destination</code>.
* @param destination
* @param regions Regions to assign.
* @return true if successful
*/
boolean assign(final ServerName destination, final List<HRegionInfo> regions) throws InterruptedException {
long startTime = EnvironmentEdgeManager.currentTime();
try {
int regionCount = regions.size();
if (regionCount == 0) {
return true;
}
LOG.info("Assigning " + regionCount + " region(s) to " + destination.toString());
Set<String> encodedNames = new HashSet<>(regionCount);
for (HRegionInfo region : regions) {
encodedNames.add(region.getEncodedName());
}
List<HRegionInfo> failedToOpenRegions = new ArrayList<>();
Map<String, Lock> locks = locker.acquireLocks(encodedNames);
try {
Map<String, RegionPlan> plans = new HashMap<>(regionCount);
List<RegionState> states = new ArrayList<>(regionCount);
for (HRegionInfo region : regions) {
String encodedName = region.getEncodedName();
if (!isDisabledorDisablingRegionInRIT(region)) {
RegionState state = forceRegionStateToOffline(region, false);
boolean onDeadServer = false;
if (state != null) {
if (regionStates.wasRegionOnDeadServer(encodedName)) {
LOG.info("Skip assigning " + region.getRegionNameAsString() + ", it's host " + regionStates.getLastRegionServerOfRegion(encodedName) + " is dead but not processed yet");
onDeadServer = true;
} else {
RegionPlan plan = new RegionPlan(region, state.getServerName(), destination);
plans.put(encodedName, plan);
states.add(state);
continue;
}
}
// Reassign if the region wasn't on a dead server
if (!onDeadServer) {
LOG.info("failed to force region state to offline, " + "will reassign later: " + region);
// assign individually later
failedToOpenRegions.add(region);
}
}
// Release the lock, this region is excluded from bulk assign because
// we can't update its state, or set its znode to offline.
Lock lock = locks.remove(encodedName);
lock.unlock();
}
if (server.isStopped()) {
return false;
}
// Add region plans, so we can updateTimers when one region is opened so
// that unnecessary timeout on RIT is reduced.
this.addPlans(plans);
List<Pair<HRegionInfo, List<ServerName>>> regionOpenInfos = new ArrayList<>(states.size());
for (RegionState state : states) {
HRegionInfo region = state.getRegion();
regionStates.updateRegionState(region, State.PENDING_OPEN, destination);
List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
if (shouldAssignFavoredNodes(region)) {
favoredNodes = server.getFavoredNodesManager().getFavoredNodesWithDNPort(region);
}
regionOpenInfos.add(new Pair<>(region, favoredNodes));
}
// Move on to open regions.
try {
// Send OPEN RPC. If it fails on a IOE or RemoteException,
// regions will be assigned individually.
Configuration conf = server.getConfiguration();
long maxWaitTime = System.currentTimeMillis() + conf.getLong("hbase.regionserver.rpc.startup.waittime", 60000);
for (int i = 1; i <= maximumAttempts && !server.isStopped(); i++) {
try {
List<RegionOpeningState> regionOpeningStateList = serverManager.sendRegionOpen(destination, regionOpenInfos);
for (int k = 0, n = regionOpeningStateList.size(); k < n; k++) {
RegionOpeningState openingState = regionOpeningStateList.get(k);
if (openingState != RegionOpeningState.OPENED) {
HRegionInfo region = regionOpenInfos.get(k).getFirst();
LOG.info("Got opening state " + openingState + ", will reassign later: " + region);
// Failed opening this region, reassign it later
forceRegionStateToOffline(region, true);
failedToOpenRegions.add(region);
}
}
break;
} catch (IOException e) {
if (e instanceof RemoteException) {
e = ((RemoteException) e).unwrapRemoteException();
}
if (e instanceof RegionServerStoppedException) {
LOG.warn("The region server was shut down, ", e);
// No need to retry, the region server is a goner.
return false;
} else if (e instanceof ServerNotRunningYetException) {
long now = System.currentTimeMillis();
if (now < maxWaitTime) {
if (LOG.isDebugEnabled()) {
LOG.debug("Server is not yet up; waiting up to " + (maxWaitTime - now) + "ms", e);
}
Thread.sleep(100);
// reset the try count
i--;
continue;
}
} else if (e instanceof java.net.SocketTimeoutException && this.serverManager.isServerOnline(destination)) {
// open the region on the same server.
if (LOG.isDebugEnabled()) {
LOG.debug("Bulk assigner openRegion() to " + destination + " has timed out, but the regions might" + " already be opened on it.", e);
}
// wait and reset the re-try count, server might be just busy.
Thread.sleep(100);
i--;
continue;
} else if (e instanceof FailedServerException && i < maximumAttempts) {
// In case the server is in the failed server list, no point to
// retry too soon. Retry after the failed_server_expiry time
long sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
if (LOG.isDebugEnabled()) {
LOG.debug(destination + " is on failed server list; waiting " + sleepTime + "ms", e);
}
Thread.sleep(sleepTime);
continue;
}
throw e;
}
}
} catch (IOException e) {
// Can be a socket timeout, EOF, NoRouteToHost, etc
LOG.info("Unable to communicate with " + destination + " in order to assign regions, ", e);
for (RegionState state : states) {
HRegionInfo region = state.getRegion();
forceRegionStateToOffline(region, true);
}
return false;
}
} finally {
for (Lock lock : locks.values()) {
lock.unlock();
}
}
if (!failedToOpenRegions.isEmpty()) {
for (HRegionInfo region : failedToOpenRegions) {
if (!regionStates.isRegionOnline(region)) {
invokeAssign(region);
}
}
}
// wait for assignment completion
ArrayList<HRegionInfo> userRegionSet = new ArrayList<>(regions.size());
for (HRegionInfo region : regions) {
if (!region.getTable().isSystemTable()) {
userRegionSet.add(region);
}
}
if (!waitForAssignment(userRegionSet, true, userRegionSet.size(), System.currentTimeMillis())) {
LOG.debug("some user regions are still in transition: " + userRegionSet);
}
LOG.debug("Bulk assigning done for " + destination);
return true;
} finally {
metricsAssignmentManager.updateBulkAssignTime(EnvironmentEdgeManager.currentTime() - startTime);
}
}
use of org.apache.hadoop.hbase.HRegionInfo in project hbase by apache.
the class AssignmentManager method processRegionsInTransition.
/**
* Processes list of regions in transition at startup
*/
void processRegionsInTransition(Collection<RegionState> regionsInTransition) {
// to the region if the master dies right after the RPC call is out.
for (RegionState regionState : regionsInTransition) {
LOG.info("Processing " + regionState);
ServerName serverName = regionState.getServerName();
// case, try assigning it here.
if (serverName != null && !serverManager.getOnlineServers().containsKey(serverName)) {
LOG.info("Server " + serverName + " isn't online. SSH will handle this");
// SSH will handle it
continue;
}
HRegionInfo regionInfo = regionState.getRegion();
RegionState.State state = regionState.getState();
switch(state) {
case CLOSED:
invokeAssign(regionState.getRegion());
break;
case PENDING_OPEN:
retrySendRegionOpen(regionState);
break;
case PENDING_CLOSE:
retrySendRegionClose(regionState);
break;
case FAILED_CLOSE:
case FAILED_OPEN:
invokeUnAssign(regionInfo);
break;
default:
// No process for other states
break;
}
}
}
use of org.apache.hadoop.hbase.HRegionInfo in project hbase by apache.
the class AssignmentManager method onRegionMerged.
private String onRegionMerged(final RegionState current, final HRegionInfo hri, final ServerName serverName, final RegionStateTransition transition) {
// it could be a reportRegionTransition RPC retry.
if (current == null || !current.isMergingNewOrOpenedOnServer(serverName)) {
return hri.getShortNameToLog() + " is not merging on " + serverName;
}
// Just return in case of retrying
if (current.isOpened()) {
return null;
}
final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1));
final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2));
RegionState rs_a = regionStates.getRegionState(a);
RegionState rs_b = regionStates.getRegionState(b);
if (rs_a == null || !rs_a.isMergingOnServer(serverName) || rs_b == null || !rs_b.isMergingOnServer(serverName)) {
return "Some daughter is not known to be merging on " + serverName + ", a=" + rs_a + ", b=" + rs_b;
}
regionOffline(a, State.MERGED);
regionOffline(b, State.MERGED);
regionOnline(hri, serverName, 1);
try {
processFavoredNodesForMerge(hri, a, b);
} catch (IOException e) {
LOG.error("Error while processing favored nodes after merge.", e);
return StringUtils.stringifyException(e);
}
// User could disable the table before master knows the new region.
if (getTableStateManager().isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING)) {
invokeUnAssign(hri);
} else {
Callable<Object> mergeReplicasCallable = new Callable<Object>() {
@Override
public Object call() {
doMergingOfReplicas(hri, a, b);
return null;
}
};
threadPoolExecutorService.submit(mergeReplicasCallable);
}
return null;
}
Aggregations