Search in sources :

Example 6 with RetryCounterFactory

use of org.apache.hadoop.hbase.util.RetryCounterFactory in project hbase by apache.

the class HRegionServer method run.

/**
 * The HRegionServer sticks in this loop until closed.
 */
@Override
public void run() {
    if (isStopped()) {
        LOG.info("Skipping run; stopped");
        return;
    }
    try {
        // Do pre-registration initializations; zookeeper, lease threads, etc.
        preRegistrationInitialization();
    } catch (Throwable e) {
        abort("Fatal exception during initialization", e);
    }
    try {
        if (!isStopped() && !isAborted()) {
            ShutdownHook.install(conf, dataFs, this, Thread.currentThread());
            // Initialize the RegionServerCoprocessorHost now that our ephemeral
            // node was created, in case any coprocessors want to use ZooKeeper
            this.rsHost = new RegionServerCoprocessorHost(this, this.conf);
            // Try and register with the Master; tell it we are here.  Break if server is stopped or
            // the clusterup flag is down or hdfs went wacky. Once registered successfully, go ahead and
            // start up all Services. Use RetryCounter to get backoff in case Master is struggling to
            // come up.
            LOG.debug("About to register with Master.");
            RetryCounterFactory rcf = new RetryCounterFactory(Integer.MAX_VALUE, this.sleeper.getPeriod(), 1000 * 60 * 5);
            RetryCounter rc = rcf.create();
            while (keepLooping()) {
                RegionServerStartupResponse w = reportForDuty();
                if (w == null) {
                    long sleepTime = rc.getBackoffTimeAndIncrementAttempts();
                    LOG.warn("reportForDuty failed; sleeping {} ms and then retrying.", sleepTime);
                    this.sleeper.sleep(sleepTime);
                } else {
                    handleReportForDutyResponse(w);
                    break;
                }
            }
        }
        if (!isStopped() && isHealthy()) {
            // since the server is ready to run
            if (this.rspmHost != null) {
                this.rspmHost.start();
            }
            // Start the Quota Manager
            if (this.rsQuotaManager != null) {
                rsQuotaManager.start(getRpcServer().getScheduler());
            }
            if (this.rsSpaceQuotaManager != null) {
                this.rsSpaceQuotaManager.start();
            }
        }
        // We registered with the Master.  Go into run mode.
        long lastMsg = EnvironmentEdgeManager.currentTime();
        long oldRequestCount = -1;
        // The main run loop.
        while (!isStopped() && isHealthy()) {
            if (!isClusterUp()) {
                if (onlineRegions.isEmpty()) {
                    stop("Exiting; cluster shutdown set and not carrying any regions");
                } else if (!this.stopping) {
                    this.stopping = true;
                    LOG.info("Closing user regions");
                    closeUserRegions(isAborted());
                } else {
                    boolean allUserRegionsOffline = areAllUserRegionsOffline();
                    if (allUserRegionsOffline) {
                        // meta regions will be closed on our way out.
                        if (oldRequestCount == getWriteRequestCount()) {
                            stop("Stopped; only catalog regions remaining online");
                            break;
                        }
                        oldRequestCount = getWriteRequestCount();
                    } else {
                        // Make sure all regions have been closed -- some regions may
                        // have not got it because we were splitting at the time of
                        // the call to closeUserRegions.
                        closeUserRegions(this.abortRequested.get());
                    }
                    LOG.debug("Waiting on " + getOnlineRegionsAsPrintableString());
                }
            }
            long now = EnvironmentEdgeManager.currentTime();
            if ((now - lastMsg) >= msgInterval) {
                tryRegionServerReport(lastMsg, now);
                lastMsg = EnvironmentEdgeManager.currentTime();
            }
            if (!isStopped() && !isAborted()) {
                this.sleeper.sleep();
            }
        }
    // for
    } catch (Throwable t) {
        if (!rpcServices.checkOOME(t)) {
            String prefix = t instanceof YouAreDeadException ? "" : "Unhandled: ";
            abort(prefix + t.getMessage(), t);
        }
    }
    if (this.leaseManager != null) {
        this.leaseManager.closeAfterLeasesExpire();
    }
    if (this.splitLogWorker != null) {
        splitLogWorker.stop();
    }
    stopInfoServer();
    // Send cache a shutdown.
    if (blockCache != null) {
        blockCache.shutdown();
    }
    if (mobFileCache != null) {
        mobFileCache.shutdown();
    }
    // TODO: Should we check they are alive? If OOME could have exited already
    if (this.hMemManager != null) {
        this.hMemManager.stop();
    }
    if (this.cacheFlusher != null) {
        this.cacheFlusher.interruptIfNecessary();
    }
    if (this.compactSplitThread != null) {
        this.compactSplitThread.interruptIfNecessary();
    }
    // Stop the snapshot and other procedure handlers, forcefully killing all running tasks
    if (rspmHost != null) {
        rspmHost.stop(this.abortRequested.get() || this.killed);
    }
    if (this.killed) {
    // Just skip out w/o closing regions.  Used when testing.
    } else if (abortRequested.get()) {
        if (this.dataFsOk) {
            // Don't leave any open file handles
            closeUserRegions(abortRequested.get());
        }
        LOG.info("aborting server " + this.serverName);
    } else {
        closeUserRegions(abortRequested.get());
        LOG.info("stopping server " + this.serverName);
    }
    regionReplicationBufferManager.stop();
    closeClusterConnection();
    // Closing the compactSplit thread before closing meta regions
    if (!this.killed && containsMetaTableRegions()) {
        if (!abortRequested.get() || this.dataFsOk) {
            if (this.compactSplitThread != null) {
                this.compactSplitThread.join();
                this.compactSplitThread = null;
            }
            closeMetaTableRegions(abortRequested.get());
        }
    }
    if (!this.killed && this.dataFsOk) {
        waitOnAllRegionsToClose(abortRequested.get());
        LOG.info("stopping server " + this.serverName + "; all regions closed.");
    }
    // Stop the quota manager
    if (rsQuotaManager != null) {
        rsQuotaManager.stop();
    }
    if (rsSpaceQuotaManager != null) {
        rsSpaceQuotaManager.stop();
        rsSpaceQuotaManager = null;
    }
    // flag may be changed when closing regions throws exception.
    if (this.dataFsOk) {
        shutdownWAL(!abortRequested.get());
    }
    // Make sure the proxy is down.
    if (this.rssStub != null) {
        this.rssStub = null;
    }
    if (this.lockStub != null) {
        this.lockStub = null;
    }
    if (this.rpcClient != null) {
        this.rpcClient.close();
    }
    if (this.leaseManager != null) {
        this.leaseManager.close();
    }
    if (this.pauseMonitor != null) {
        this.pauseMonitor.stop();
    }
    if (!killed) {
        stopServiceThreads();
    }
    if (this.rpcServices != null) {
        this.rpcServices.stop();
    }
    try {
        deleteMyEphemeralNode();
    } catch (KeeperException.NoNodeException nn) {
    // pass
    } catch (KeeperException e) {
        LOG.warn("Failed deleting my ephemeral node", e);
    }
    // We may have failed to delete the znode at the previous step, but
    // we delete the file anyway: a second attempt to delete the znode is likely to fail again.
    ZNodeClearer.deleteMyEphemeralNodeOnDisk();
    closeZooKeeper();
    LOG.info("Exiting; stopping=" + this.serverName + "; zookeeper connection closed.");
}
Also used : RetryCounterFactory(org.apache.hadoop.hbase.util.RetryCounterFactory) RetryCounter(org.apache.hadoop.hbase.util.RetryCounter) RegionServerStartupResponse(org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerStartupResponse) YouAreDeadException(org.apache.hadoop.hbase.YouAreDeadException) KeeperException(org.apache.zookeeper.KeeperException)

Aggregations

RetryCounterFactory (org.apache.hadoop.hbase.util.RetryCounterFactory)6 IOException (java.io.IOException)3 RetryCounter (org.apache.hadoop.hbase.util.RetryCounter)3 InterruptedIOException (java.io.InterruptedIOException)2 TableNotFoundException (org.apache.hadoop.hbase.TableNotFoundException)2 RetryConfig (org.apache.hadoop.hbase.util.RetryCounter.RetryConfig)2 RestrictedApi (com.google.errorprone.annotations.RestrictedApi)1 Constructor (java.lang.reflect.Constructor)1 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 InetAddress (java.net.InetAddress)1 InetSocketAddress (java.net.InetSocketAddress)1 UnknownHostException (java.net.UnknownHostException)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 Comparator (java.util.Comparator)1 EnumSet (java.util.EnumSet)1 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1