use of org.apache.hadoop.hbase.YouAreDeadException in project hbase by apache.
the class HRegionServer method tryRegionServerReport.
@VisibleForTesting
protected void tryRegionServerReport(long reportStartTime, long reportEndTime) throws IOException {
RegionServerStatusService.BlockingInterface rss = rssStub;
if (rss == null) {
// the current server could be stopping.
return;
}
ClusterStatusProtos.ServerLoad sl = buildServerLoad(reportStartTime, reportEndTime);
try {
RegionServerReportRequest.Builder request = RegionServerReportRequest.newBuilder();
ServerName sn = ServerName.parseVersionedServerName(this.serverName.getVersionedBytes());
request.setServer(ProtobufUtil.toServerName(sn));
request.setLoad(sl);
rss.regionServerReport(null, request.build());
} catch (ServiceException se) {
IOException ioe = ProtobufUtil.getRemoteException(se);
if (ioe instanceof YouAreDeadException) {
// This will be caught and handled as a fatal error in run()
throw ioe;
}
if (rssStub == rss) {
rssStub = null;
}
// Couldn't connect to the master, get location from zk and reconnect
// Method blocks until new master is found or we are stopped
createRegionServerStatusStub(true);
}
}
use of org.apache.hadoop.hbase.YouAreDeadException in project hbase by apache.
the class TestAssignmentManagerBase method doCrash.
protected void doCrash(final ServerName serverName) {
this.master.getServerManager().moveFromOnlineToDeadServers(serverName);
this.am.submitServerCrash(serverName, false, /* No WALs here */
false);
// add a new server to avoid killing all the region servers which may hang the UTs
ServerName newSn = ServerName.valueOf("localhost", 10000 + newRsAdded, 1);
newRsAdded++;
try {
this.master.getServerManager().regionServerReport(newSn, ServerMetricsBuilder.newBuilder(newSn).setLastReportTimestamp(EnvironmentEdgeManager.currentTime()).build());
} catch (YouAreDeadException e) {
// should not happen
throw new UncheckedIOException(e);
}
}
use of org.apache.hadoop.hbase.YouAreDeadException in project hbase by apache.
the class HRegionServer method run.
/**
* The HRegionServer sticks in this loop until closed.
*/
@Override
public void run() {
if (isStopped()) {
LOG.info("Skipping run; stopped");
return;
}
try {
// Do pre-registration initializations; zookeeper, lease threads, etc.
preRegistrationInitialization();
} catch (Throwable e) {
abort("Fatal exception during initialization", e);
}
try {
if (!isStopped() && !isAborted()) {
ShutdownHook.install(conf, dataFs, this, Thread.currentThread());
// Initialize the RegionServerCoprocessorHost now that our ephemeral
// node was created, in case any coprocessors want to use ZooKeeper
this.rsHost = new RegionServerCoprocessorHost(this, this.conf);
// Try and register with the Master; tell it we are here. Break if server is stopped or
// the clusterup flag is down or hdfs went wacky. Once registered successfully, go ahead and
// start up all Services. Use RetryCounter to get backoff in case Master is struggling to
// come up.
LOG.debug("About to register with Master.");
RetryCounterFactory rcf = new RetryCounterFactory(Integer.MAX_VALUE, this.sleeper.getPeriod(), 1000 * 60 * 5);
RetryCounter rc = rcf.create();
while (keepLooping()) {
RegionServerStartupResponse w = reportForDuty();
if (w == null) {
long sleepTime = rc.getBackoffTimeAndIncrementAttempts();
LOG.warn("reportForDuty failed; sleeping {} ms and then retrying.", sleepTime);
this.sleeper.sleep(sleepTime);
} else {
handleReportForDutyResponse(w);
break;
}
}
}
if (!isStopped() && isHealthy()) {
// since the server is ready to run
if (this.rspmHost != null) {
this.rspmHost.start();
}
// Start the Quota Manager
if (this.rsQuotaManager != null) {
rsQuotaManager.start(getRpcServer().getScheduler());
}
if (this.rsSpaceQuotaManager != null) {
this.rsSpaceQuotaManager.start();
}
}
// We registered with the Master. Go into run mode.
long lastMsg = EnvironmentEdgeManager.currentTime();
long oldRequestCount = -1;
// The main run loop.
while (!isStopped() && isHealthy()) {
if (!isClusterUp()) {
if (onlineRegions.isEmpty()) {
stop("Exiting; cluster shutdown set and not carrying any regions");
} else if (!this.stopping) {
this.stopping = true;
LOG.info("Closing user regions");
closeUserRegions(isAborted());
} else {
boolean allUserRegionsOffline = areAllUserRegionsOffline();
if (allUserRegionsOffline) {
// meta regions will be closed on our way out.
if (oldRequestCount == getWriteRequestCount()) {
stop("Stopped; only catalog regions remaining online");
break;
}
oldRequestCount = getWriteRequestCount();
} else {
// Make sure all regions have been closed -- some regions may
// have not got it because we were splitting at the time of
// the call to closeUserRegions.
closeUserRegions(this.abortRequested.get());
}
LOG.debug("Waiting on " + getOnlineRegionsAsPrintableString());
}
}
long now = EnvironmentEdgeManager.currentTime();
if ((now - lastMsg) >= msgInterval) {
tryRegionServerReport(lastMsg, now);
lastMsg = EnvironmentEdgeManager.currentTime();
}
if (!isStopped() && !isAborted()) {
this.sleeper.sleep();
}
}
// for
} catch (Throwable t) {
if (!rpcServices.checkOOME(t)) {
String prefix = t instanceof YouAreDeadException ? "" : "Unhandled: ";
abort(prefix + t.getMessage(), t);
}
}
if (this.leaseManager != null) {
this.leaseManager.closeAfterLeasesExpire();
}
if (this.splitLogWorker != null) {
splitLogWorker.stop();
}
stopInfoServer();
// Send cache a shutdown.
if (blockCache != null) {
blockCache.shutdown();
}
if (mobFileCache != null) {
mobFileCache.shutdown();
}
// TODO: Should we check they are alive? If OOME could have exited already
if (this.hMemManager != null) {
this.hMemManager.stop();
}
if (this.cacheFlusher != null) {
this.cacheFlusher.interruptIfNecessary();
}
if (this.compactSplitThread != null) {
this.compactSplitThread.interruptIfNecessary();
}
// Stop the snapshot and other procedure handlers, forcefully killing all running tasks
if (rspmHost != null) {
rspmHost.stop(this.abortRequested.get() || this.killed);
}
if (this.killed) {
// Just skip out w/o closing regions. Used when testing.
} else if (abortRequested.get()) {
if (this.dataFsOk) {
// Don't leave any open file handles
closeUserRegions(abortRequested.get());
}
LOG.info("aborting server " + this.serverName);
} else {
closeUserRegions(abortRequested.get());
LOG.info("stopping server " + this.serverName);
}
regionReplicationBufferManager.stop();
closeClusterConnection();
// Closing the compactSplit thread before closing meta regions
if (!this.killed && containsMetaTableRegions()) {
if (!abortRequested.get() || this.dataFsOk) {
if (this.compactSplitThread != null) {
this.compactSplitThread.join();
this.compactSplitThread = null;
}
closeMetaTableRegions(abortRequested.get());
}
}
if (!this.killed && this.dataFsOk) {
waitOnAllRegionsToClose(abortRequested.get());
LOG.info("stopping server " + this.serverName + "; all regions closed.");
}
// Stop the quota manager
if (rsQuotaManager != null) {
rsQuotaManager.stop();
}
if (rsSpaceQuotaManager != null) {
rsSpaceQuotaManager.stop();
rsSpaceQuotaManager = null;
}
// flag may be changed when closing regions throws exception.
if (this.dataFsOk) {
shutdownWAL(!abortRequested.get());
}
// Make sure the proxy is down.
if (this.rssStub != null) {
this.rssStub = null;
}
if (this.lockStub != null) {
this.lockStub = null;
}
if (this.rpcClient != null) {
this.rpcClient.close();
}
if (this.leaseManager != null) {
this.leaseManager.close();
}
if (this.pauseMonitor != null) {
this.pauseMonitor.stop();
}
if (!killed) {
stopServiceThreads();
}
if (this.rpcServices != null) {
this.rpcServices.stop();
}
try {
deleteMyEphemeralNode();
} catch (KeeperException.NoNodeException nn) {
// pass
} catch (KeeperException e) {
LOG.warn("Failed deleting my ephemeral node", e);
}
// We may have failed to delete the znode at the previous step, but
// we delete the file anyway: a second attempt to delete the znode is likely to fail again.
ZNodeClearer.deleteMyEphemeralNodeOnDisk();
closeZooKeeper();
LOG.info("Exiting; stopping=" + this.serverName + "; zookeeper connection closed.");
}
use of org.apache.hadoop.hbase.YouAreDeadException in project hbase by apache.
the class ServerManager method checkIsDead.
/**
* Called when RegionServer first reports in for duty and thereafter each
* time it heartbeats to make sure it is has not been figured for dead.
* If this server is on the dead list, reject it with a YouAreDeadException.
* If it was dead but came back with a new start code, remove the old entry
* from the dead list.
* @param what START or REPORT
*/
private void checkIsDead(final ServerName serverName, final String what) throws YouAreDeadException {
if (this.deadservers.isDeadServer(serverName)) {
// Exact match: host name, port and start code all match with existing one of the
// dead servers. So, this server must be dead. Tell it to kill itself.
String message = "Server " + what + " rejected; currently processing " + serverName + " as dead server";
LOG.debug(message);
throw new YouAreDeadException(message);
}
// initialization. See HBASE-5916 for more information.
if ((this.master == null || this.master.isInitialized()) && this.deadservers.cleanPreviousInstance(serverName)) {
// This server has now become alive after we marked it as dead.
// We removed it's previous entry from the dead list to reflect it.
LOG.debug("{} {} came back up, removed it from the dead servers list", what, serverName);
}
}
use of org.apache.hadoop.hbase.YouAreDeadException in project hbase by apache.
the class TestRogueRSAssignment method testReportRSWithWrongRegion.
/**
* Ignore this test, see HBASE-21421
*/
@Test
@Ignore
public void testReportRSWithWrongRegion() throws Exception {
final TableName tableName = TableName.valueOf(this.name.getMethodName());
List<RegionInfo> tableRegions = createTable(tableName);
final ServerName sn = ServerName.parseVersionedServerName(ServerName.valueOf("1.example.org", 1, EnvironmentEdgeManager.currentTime()).getVersionedBytes());
// make fake request with a region assigned to different RS
RegionServerStatusProtos.RegionServerReportRequest.Builder request = makeRSReportRequestWithRegions(sn, tableRegions.get(1));
// sending fake request to master
// TODO: replace YouAreDeadException with appropriate exception as and when necessary
exception.expect(ServiceException.class);
exception.expectCause(isA(YouAreDeadException.class));
RegionServerStatusProtos.RegionServerReportResponse response = master.getMasterRpcServices().regionServerReport(null, request.build());
}
Aggregations