use of org.apache.hadoop.hbase.ServerName in project hbase by apache.
the class AssignmentManager method retrySendRegionOpen.
/**
* At master failover, for pending_open region, make sure
* sendRegionOpen RPC call is sent to the target regionserver
*/
private void retrySendRegionOpen(final RegionState regionState) {
this.executorService.submit(new EventHandler(server, EventType.M_MASTER_RECOVERY) {
@Override
public void process() throws IOException {
HRegionInfo hri = regionState.getRegion();
ServerName serverName = regionState.getServerName();
ReentrantLock lock = locker.acquireLock(hri.getEncodedName());
try {
for (int i = 1; i <= maximumAttempts; i++) {
if (!serverManager.isServerOnline(serverName) || server.isStopped() || server.isAborted()) {
// No need any more
return;
}
try {
if (!regionState.equals(regionStates.getRegionState(hri))) {
// Region is not in the expected state any more
return;
}
List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
if (shouldAssignFavoredNodes(hri)) {
FavoredNodesManager fnm = ((MasterServices) server).getFavoredNodesManager();
favoredNodes = fnm.getFavoredNodesWithDNPort(hri);
}
serverManager.sendRegionOpen(serverName, hri, favoredNodes);
// we're done
return;
} catch (Throwable t) {
if (t instanceof RemoteException) {
t = ((RemoteException) t).unwrapRemoteException();
}
if (t instanceof FailedServerException && i < maximumAttempts) {
// retry too soon. Retry after the failed_server_expiry time
try {
Configuration conf = this.server.getConfiguration();
long sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
if (LOG.isDebugEnabled()) {
LOG.debug(serverName + " is on failed server list; waiting " + sleepTime + "ms", t);
}
Thread.sleep(sleepTime);
continue;
} catch (InterruptedException ie) {
LOG.warn("Failed to assign " + hri.getRegionNameAsString() + " since interrupted", ie);
regionStates.updateRegionState(hri, State.FAILED_OPEN);
Thread.currentThread().interrupt();
return;
}
}
if (serverManager.isServerOnline(serverName) && t instanceof java.net.SocketTimeoutException) {
// reset the try count
i--;
} else {
LOG.info("Got exception in retrying sendRegionOpen for " + regionState + "; try=" + i + " of " + maximumAttempts, t);
}
Threads.sleep(100);
}
}
// Run out of attempts
regionStates.updateRegionState(hri, State.FAILED_OPEN);
} finally {
lock.unlock();
}
}
});
}
use of org.apache.hadoop.hbase.ServerName in project hbase by apache.
the class MasterRpcServices method unassignRegion.
@Override
public UnassignRegionResponse unassignRegion(RpcController controller, UnassignRegionRequest req) throws ServiceException {
try {
final byte[] regionName = req.getRegion().getValue().toByteArray();
RegionSpecifierType type = req.getRegion().getType();
final boolean force = req.getForce();
UnassignRegionResponse urr = UnassignRegionResponse.newBuilder().build();
master.checkInitialized();
if (type != RegionSpecifierType.REGION_NAME) {
LOG.warn("unassignRegion specifier type: expected: " + RegionSpecifierType.REGION_NAME + " actual: " + type);
}
Pair<HRegionInfo, ServerName> pair = MetaTableAccessor.getRegion(master.getConnection(), regionName);
if (Bytes.equals(HRegionInfo.FIRST_META_REGIONINFO.getRegionName(), regionName)) {
pair = new Pair<>(HRegionInfo.FIRST_META_REGIONINFO, master.getMetaTableLocator().getMetaRegionLocation(master.getZooKeeper()));
}
if (pair == null) {
throw new UnknownRegionException(Bytes.toString(regionName));
}
if (pair == null)
throw new UnknownRegionException(Bytes.toString(regionName));
HRegionInfo hri = pair.getFirst();
if (master.cpHost != null) {
if (master.cpHost.preUnassign(hri, force)) {
return urr;
}
}
LOG.debug(master.getClientIdAuditPrefix() + " unassign " + hri.getRegionNameAsString() + " in current location if it is online and reassign.force=" + force);
master.getAssignmentManager().unassign(hri);
if (master.cpHost != null) {
master.cpHost.postUnassign(hri, force);
}
return urr;
} catch (IOException ioe) {
throw new ServiceException(ioe);
}
}
use of org.apache.hadoop.hbase.ServerName in project hbase by apache.
the class MasterRpcServices method listDrainingRegionServers.
@Override
public ListDrainingRegionServersResponse listDrainingRegionServers(RpcController controller, ListDrainingRegionServersRequest request) throws ServiceException {
ListDrainingRegionServersResponse.Builder response = ListDrainingRegionServersResponse.newBuilder();
try {
master.checkInitialized();
List<ServerName> servers = master.listDrainingRegionServers();
for (ServerName server : servers) {
response.addServerName(ProtobufUtil.toServerName(server));
}
} catch (IOException io) {
throw new ServiceException(io);
}
return response.build();
}
use of org.apache.hadoop.hbase.ServerName in project hbase by apache.
the class MasterMetaBootstrap method splitMetaLogsBeforeAssignment.
public void splitMetaLogsBeforeAssignment() throws IOException, KeeperException {
// get a list for previously failed RS which need log splitting work
// we recover hbase:meta region servers inside master initialization and
// handle other failed servers in SSH in order to start up master node ASAP
previouslyFailedServers = master.getMasterWalManager().getFailedServersFromLogFolders();
// log splitting for hbase:meta server
ServerName oldMetaServerLocation = master.getMetaTableLocator().getMetaRegionLocation(master.getZooKeeper());
if (oldMetaServerLocation != null && previouslyFailedServers.contains(oldMetaServerLocation)) {
splitMetaLogBeforeAssignment(oldMetaServerLocation);
// Note: we can't remove oldMetaServerLocation from previousFailedServers list because it
// may also host user regions
}
previouslyFailedMetaRSs = getPreviouselyFailedMetaServersFromZK();
// need to use union of previouslyFailedMetaRSs recorded in ZK and previouslyFailedServers
// instead of previouslyFailedMetaRSs alone to address the following two situations:
// 1) the chained failure situation(recovery failed multiple times in a row).
// 2) master get killed right before it could delete the recovering hbase:meta from ZK while the
// same server still has non-meta wals to be replayed so that
// removeStaleRecoveringRegionsFromZK can't delete the stale hbase:meta region
// Passing more servers into splitMetaLog is all right. If a server doesn't have hbase:meta wal,
// there is no op for the server.
previouslyFailedMetaRSs.addAll(previouslyFailedServers);
}
use of org.apache.hadoop.hbase.ServerName in project hbase by apache.
the class MasterMetaBootstrap method assignMeta.
/**
* Check <code>hbase:meta</code> is assigned. If not, assign it.
*/
protected void assignMeta(Set<ServerName> previouslyFailedMetaRSs, int replicaId) throws InterruptedException, IOException, KeeperException {
final AssignmentManager assignmentManager = master.getAssignmentManager();
// Work on meta region
int assigned = 0;
long timeout = master.getConfiguration().getLong("hbase.catalog.verification.timeout", 1000);
if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
status.setStatus("Assigning hbase:meta region");
} else {
status.setStatus("Assigning hbase:meta region, replicaId " + replicaId);
}
// Get current meta state from zk.
RegionState metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper(), replicaId);
HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, replicaId);
RegionStates regionStates = assignmentManager.getRegionStates();
regionStates.createRegionState(hri, metaState.getState(), metaState.getServerName(), null);
if (!metaState.isOpened() || !master.getMetaTableLocator().verifyMetaRegionLocation(master.getClusterConnection(), master.getZooKeeper(), timeout, replicaId)) {
ServerName currentMetaServer = metaState.getServerName();
if (master.getServerManager().isServerOnline(currentMetaServer)) {
if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
LOG.info("Meta was in transition on " + currentMetaServer);
} else {
LOG.info("Meta with replicaId " + replicaId + " was in transition on " + currentMetaServer);
}
assignmentManager.processRegionsInTransition(Collections.singletonList(metaState));
} else {
if (currentMetaServer != null) {
if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
splitMetaLogBeforeAssignment(currentMetaServer);
regionStates.logSplit(HRegionInfo.FIRST_META_REGIONINFO);
previouslyFailedMetaRSs.add(currentMetaServer);
}
}
LOG.info("Re-assigning hbase:meta with replicaId, " + replicaId + " it was on " + currentMetaServer);
assignmentManager.assignMeta(hri);
}
assigned++;
}
if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) {
// TODO: should we prevent from using state manager before meta was initialized?
// tableStateManager.start();
master.getTableStateManager().setTableState(TableName.META_TABLE_NAME, TableState.State.ENABLED);
}
if ((RecoveryMode.LOG_REPLAY == master.getMasterWalManager().getLogRecoveryMode()) && (!previouslyFailedMetaRSs.isEmpty())) {
// replay WAL edits mode need new hbase:meta RS is assigned firstly
status.setStatus("replaying log for Meta Region");
master.getMasterWalManager().splitMetaLog(previouslyFailedMetaRSs);
}
assignmentManager.setEnabledTable(TableName.META_TABLE_NAME);
master.getTableStateManager().start();
// No need to wait for meta is assigned = 0 when meta is just verified.
if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID)
enableCrashedServerProcessing(assigned != 0);
LOG.info("hbase:meta with replicaId " + replicaId + " assigned=" + assigned + ", location=" + master.getMetaTableLocator().getMetaRegionLocation(master.getZooKeeper(), replicaId));
status.setStatus("META assigned.");
}
Aggregations