Search in sources :

Example 1 with YouAreDeadException

use of org.apache.hadoop.hbase.YouAreDeadException in project hbase by apache.

the class HRegionServer method tryRegionServerReport.

@VisibleForTesting
protected void tryRegionServerReport(long reportStartTime, long reportEndTime) throws IOException {
    RegionServerStatusService.BlockingInterface rss = rssStub;
    if (rss == null) {
        // the current server could be stopping.
        return;
    }
    ClusterStatusProtos.ServerLoad sl = buildServerLoad(reportStartTime, reportEndTime);
    try {
        RegionServerReportRequest.Builder request = RegionServerReportRequest.newBuilder();
        ServerName sn = ServerName.parseVersionedServerName(this.serverName.getVersionedBytes());
        request.setServer(ProtobufUtil.toServerName(sn));
        request.setLoad(sl);
        rss.regionServerReport(null, request.build());
    } catch (ServiceException se) {
        IOException ioe = ProtobufUtil.getRemoteException(se);
        if (ioe instanceof YouAreDeadException) {
            // This will be caught and handled as a fatal error in run()
            throw ioe;
        }
        if (rssStub == rss) {
            rssStub = null;
        }
        // Couldn't connect to the master, get location from zk and reconnect
        // Method blocks until new master is found or we are stopped
        createRegionServerStatusStub(true);
    }
}
Also used : ClusterStatusProtos(org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos) RegionServerReportRequest(org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerReportRequest) ServiceException(org.apache.hadoop.hbase.shaded.com.google.protobuf.ServiceException) ServerName(org.apache.hadoop.hbase.ServerName) RegionServerStatusService(org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerStatusService) YouAreDeadException(org.apache.hadoop.hbase.YouAreDeadException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 2 with YouAreDeadException

use of org.apache.hadoop.hbase.YouAreDeadException in project hbase by apache.

the class TestAssignmentManagerBase method doCrash.

protected void doCrash(final ServerName serverName) {
    this.master.getServerManager().moveFromOnlineToDeadServers(serverName);
    this.am.submitServerCrash(serverName, false, /* No WALs here */
    false);
    // add a new server to avoid killing all the region servers which may hang the UTs
    ServerName newSn = ServerName.valueOf("localhost", 10000 + newRsAdded, 1);
    newRsAdded++;
    try {
        this.master.getServerManager().regionServerReport(newSn, ServerMetricsBuilder.newBuilder(newSn).setLastReportTimestamp(EnvironmentEdgeManager.currentTime()).build());
    } catch (YouAreDeadException e) {
        // should not happen
        throw new UncheckedIOException(e);
    }
}
Also used : ServerName(org.apache.hadoop.hbase.ServerName) YouAreDeadException(org.apache.hadoop.hbase.YouAreDeadException) UncheckedIOException(java.io.UncheckedIOException)

Example 3 with YouAreDeadException

use of org.apache.hadoop.hbase.YouAreDeadException in project hbase by apache.

the class HRegionServer method run.

/**
 * The HRegionServer sticks in this loop until closed.
 */
@Override
public void run() {
    if (isStopped()) {
        LOG.info("Skipping run; stopped");
        return;
    }
    try {
        // Do pre-registration initializations; zookeeper, lease threads, etc.
        preRegistrationInitialization();
    } catch (Throwable e) {
        abort("Fatal exception during initialization", e);
    }
    try {
        if (!isStopped() && !isAborted()) {
            ShutdownHook.install(conf, dataFs, this, Thread.currentThread());
            // Initialize the RegionServerCoprocessorHost now that our ephemeral
            // node was created, in case any coprocessors want to use ZooKeeper
            this.rsHost = new RegionServerCoprocessorHost(this, this.conf);
            // Try and register with the Master; tell it we are here.  Break if server is stopped or
            // the clusterup flag is down or hdfs went wacky. Once registered successfully, go ahead and
            // start up all Services. Use RetryCounter to get backoff in case Master is struggling to
            // come up.
            LOG.debug("About to register with Master.");
            RetryCounterFactory rcf = new RetryCounterFactory(Integer.MAX_VALUE, this.sleeper.getPeriod(), 1000 * 60 * 5);
            RetryCounter rc = rcf.create();
            while (keepLooping()) {
                RegionServerStartupResponse w = reportForDuty();
                if (w == null) {
                    long sleepTime = rc.getBackoffTimeAndIncrementAttempts();
                    LOG.warn("reportForDuty failed; sleeping {} ms and then retrying.", sleepTime);
                    this.sleeper.sleep(sleepTime);
                } else {
                    handleReportForDutyResponse(w);
                    break;
                }
            }
        }
        if (!isStopped() && isHealthy()) {
            // since the server is ready to run
            if (this.rspmHost != null) {
                this.rspmHost.start();
            }
            // Start the Quota Manager
            if (this.rsQuotaManager != null) {
                rsQuotaManager.start(getRpcServer().getScheduler());
            }
            if (this.rsSpaceQuotaManager != null) {
                this.rsSpaceQuotaManager.start();
            }
        }
        // We registered with the Master.  Go into run mode.
        long lastMsg = EnvironmentEdgeManager.currentTime();
        long oldRequestCount = -1;
        // The main run loop.
        while (!isStopped() && isHealthy()) {
            if (!isClusterUp()) {
                if (onlineRegions.isEmpty()) {
                    stop("Exiting; cluster shutdown set and not carrying any regions");
                } else if (!this.stopping) {
                    this.stopping = true;
                    LOG.info("Closing user regions");
                    closeUserRegions(isAborted());
                } else {
                    boolean allUserRegionsOffline = areAllUserRegionsOffline();
                    if (allUserRegionsOffline) {
                        // meta regions will be closed on our way out.
                        if (oldRequestCount == getWriteRequestCount()) {
                            stop("Stopped; only catalog regions remaining online");
                            break;
                        }
                        oldRequestCount = getWriteRequestCount();
                    } else {
                        // Make sure all regions have been closed -- some regions may
                        // have not got it because we were splitting at the time of
                        // the call to closeUserRegions.
                        closeUserRegions(this.abortRequested.get());
                    }
                    LOG.debug("Waiting on " + getOnlineRegionsAsPrintableString());
                }
            }
            long now = EnvironmentEdgeManager.currentTime();
            if ((now - lastMsg) >= msgInterval) {
                tryRegionServerReport(lastMsg, now);
                lastMsg = EnvironmentEdgeManager.currentTime();
            }
            if (!isStopped() && !isAborted()) {
                this.sleeper.sleep();
            }
        }
    // for
    } catch (Throwable t) {
        if (!rpcServices.checkOOME(t)) {
            String prefix = t instanceof YouAreDeadException ? "" : "Unhandled: ";
            abort(prefix + t.getMessage(), t);
        }
    }
    if (this.leaseManager != null) {
        this.leaseManager.closeAfterLeasesExpire();
    }
    if (this.splitLogWorker != null) {
        splitLogWorker.stop();
    }
    stopInfoServer();
    // Send cache a shutdown.
    if (blockCache != null) {
        blockCache.shutdown();
    }
    if (mobFileCache != null) {
        mobFileCache.shutdown();
    }
    // TODO: Should we check they are alive? If OOME could have exited already
    if (this.hMemManager != null) {
        this.hMemManager.stop();
    }
    if (this.cacheFlusher != null) {
        this.cacheFlusher.interruptIfNecessary();
    }
    if (this.compactSplitThread != null) {
        this.compactSplitThread.interruptIfNecessary();
    }
    // Stop the snapshot and other procedure handlers, forcefully killing all running tasks
    if (rspmHost != null) {
        rspmHost.stop(this.abortRequested.get() || this.killed);
    }
    if (this.killed) {
    // Just skip out w/o closing regions.  Used when testing.
    } else if (abortRequested.get()) {
        if (this.dataFsOk) {
            // Don't leave any open file handles
            closeUserRegions(abortRequested.get());
        }
        LOG.info("aborting server " + this.serverName);
    } else {
        closeUserRegions(abortRequested.get());
        LOG.info("stopping server " + this.serverName);
    }
    regionReplicationBufferManager.stop();
    closeClusterConnection();
    // Closing the compactSplit thread before closing meta regions
    if (!this.killed && containsMetaTableRegions()) {
        if (!abortRequested.get() || this.dataFsOk) {
            if (this.compactSplitThread != null) {
                this.compactSplitThread.join();
                this.compactSplitThread = null;
            }
            closeMetaTableRegions(abortRequested.get());
        }
    }
    if (!this.killed && this.dataFsOk) {
        waitOnAllRegionsToClose(abortRequested.get());
        LOG.info("stopping server " + this.serverName + "; all regions closed.");
    }
    // Stop the quota manager
    if (rsQuotaManager != null) {
        rsQuotaManager.stop();
    }
    if (rsSpaceQuotaManager != null) {
        rsSpaceQuotaManager.stop();
        rsSpaceQuotaManager = null;
    }
    // flag may be changed when closing regions throws exception.
    if (this.dataFsOk) {
        shutdownWAL(!abortRequested.get());
    }
    // Make sure the proxy is down.
    if (this.rssStub != null) {
        this.rssStub = null;
    }
    if (this.lockStub != null) {
        this.lockStub = null;
    }
    if (this.rpcClient != null) {
        this.rpcClient.close();
    }
    if (this.leaseManager != null) {
        this.leaseManager.close();
    }
    if (this.pauseMonitor != null) {
        this.pauseMonitor.stop();
    }
    if (!killed) {
        stopServiceThreads();
    }
    if (this.rpcServices != null) {
        this.rpcServices.stop();
    }
    try {
        deleteMyEphemeralNode();
    } catch (KeeperException.NoNodeException nn) {
    // pass
    } catch (KeeperException e) {
        LOG.warn("Failed deleting my ephemeral node", e);
    }
    // We may have failed to delete the znode at the previous step, but
    // we delete the file anyway: a second attempt to delete the znode is likely to fail again.
    ZNodeClearer.deleteMyEphemeralNodeOnDisk();
    closeZooKeeper();
    LOG.info("Exiting; stopping=" + this.serverName + "; zookeeper connection closed.");
}
Also used : RetryCounterFactory(org.apache.hadoop.hbase.util.RetryCounterFactory) RetryCounter(org.apache.hadoop.hbase.util.RetryCounter) RegionServerStartupResponse(org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerStartupResponse) YouAreDeadException(org.apache.hadoop.hbase.YouAreDeadException) KeeperException(org.apache.zookeeper.KeeperException)

Example 4 with YouAreDeadException

use of org.apache.hadoop.hbase.YouAreDeadException in project hbase by apache.

the class ServerManager method checkIsDead.

/**
 * Called when RegionServer first reports in for duty and thereafter each
 * time it heartbeats to make sure it is has not been figured for dead.
 * If this server is on the dead list, reject it with a YouAreDeadException.
 * If it was dead but came back with a new start code, remove the old entry
 * from the dead list.
 * @param what START or REPORT
 */
private void checkIsDead(final ServerName serverName, final String what) throws YouAreDeadException {
    if (this.deadservers.isDeadServer(serverName)) {
        // Exact match: host name, port and start code all match with existing one of the
        // dead servers. So, this server must be dead. Tell it to kill itself.
        String message = "Server " + what + " rejected; currently processing " + serverName + " as dead server";
        LOG.debug(message);
        throw new YouAreDeadException(message);
    }
    // initialization. See HBASE-5916 for more information.
    if ((this.master == null || this.master.isInitialized()) && this.deadservers.cleanPreviousInstance(serverName)) {
        // This server has now become alive after we marked it as dead.
        // We removed it's previous entry from the dead list to reflect it.
        LOG.debug("{} {} came back up, removed it from the dead servers list", what, serverName);
    }
}
Also used : YouAreDeadException(org.apache.hadoop.hbase.YouAreDeadException) ByteString(org.apache.hbase.thirdparty.com.google.protobuf.ByteString)

Example 5 with YouAreDeadException

use of org.apache.hadoop.hbase.YouAreDeadException in project hbase by apache.

the class TestRogueRSAssignment method testReportRSWithWrongRegion.

/**
 * Ignore this test, see HBASE-21421
 */
@Test
@Ignore
public void testReportRSWithWrongRegion() throws Exception {
    final TableName tableName = TableName.valueOf(this.name.getMethodName());
    List<RegionInfo> tableRegions = createTable(tableName);
    final ServerName sn = ServerName.parseVersionedServerName(ServerName.valueOf("1.example.org", 1, EnvironmentEdgeManager.currentTime()).getVersionedBytes());
    // make fake request with a region assigned to different RS
    RegionServerStatusProtos.RegionServerReportRequest.Builder request = makeRSReportRequestWithRegions(sn, tableRegions.get(1));
    // sending fake request to master
    // TODO: replace YouAreDeadException with appropriate exception as and when necessary
    exception.expect(ServiceException.class);
    exception.expectCause(isA(YouAreDeadException.class));
    RegionServerStatusProtos.RegionServerReportResponse response = master.getMasterRpcServices().regionServerReport(null, request.build());
}
Also used : RegionServerStatusProtos(org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos) TableName(org.apache.hadoop.hbase.TableName) ServerName(org.apache.hadoop.hbase.ServerName) RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) YouAreDeadException(org.apache.hadoop.hbase.YouAreDeadException) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

YouAreDeadException (org.apache.hadoop.hbase.YouAreDeadException)5 ServerName (org.apache.hadoop.hbase.ServerName)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 IOException (java.io.IOException)1 InterruptedIOException (java.io.InterruptedIOException)1 UncheckedIOException (java.io.UncheckedIOException)1 TableName (org.apache.hadoop.hbase.TableName)1 RegionInfo (org.apache.hadoop.hbase.client.RegionInfo)1 ServiceException (org.apache.hadoop.hbase.shaded.com.google.protobuf.ServiceException)1 ClusterStatusProtos (org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos)1 RegionServerStatusProtos (org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos)1 RegionServerReportRequest (org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerReportRequest)1 RegionServerStartupResponse (org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerStartupResponse)1 RegionServerStatusService (org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerStatusService)1 RetryCounter (org.apache.hadoop.hbase.util.RetryCounter)1 RetryCounterFactory (org.apache.hadoop.hbase.util.RetryCounterFactory)1 ByteString (org.apache.hbase.thirdparty.com.google.protobuf.ByteString)1 KeeperException (org.apache.zookeeper.KeeperException)1 Ignore (org.junit.Ignore)1 Test (org.junit.Test)1