Search in sources :

Example 1 with MasterMessage

use of org.apache.accumulo.tserver.mastermessage.MasterMessage in project accumulo by apache.

the class TabletServer method run.

// main loop listens for client requests
@Override
public void run() {
    SecurityUtil.serverLogin(SiteConfiguration.getInstance());
    // We can just make the zookeeper paths before we try to use.
    try {
        ZooKeeperInitialization.ensureZooKeeperInitialized(ZooReaderWriter.getInstance(), ZooUtil.getRoot(getInstance()));
    } catch (KeeperException | InterruptedException e) {
        log.error("Could not ensure that ZooKeeper is properly initialized", e);
        throw new RuntimeException(e);
    }
    Metrics tserverMetrics = metricsFactory.createTabletServerMetrics(this);
    // Register MBeans
    try {
        tserverMetrics.register();
        mincMetrics.register();
        scanMetrics.register();
        updateMetrics.register();
    } catch (Exception e) {
        log.error("Error registering with JMX", e);
    }
    if (null != authKeyWatcher) {
        log.info("Seeding ZooKeeper watcher for authentication keys");
        try {
            authKeyWatcher.updateAuthKeys();
        } catch (KeeperException | InterruptedException e) {
            // TODO Does there need to be a better check? What are the error conditions that we'd fall out here? AUTH_FAILURE?
            // If we get the error, do we just put it on a timer and retry the exists(String, Watcher) call?
            log.error("Failed to perform initial check for authentication tokens in ZooKeeper. Delegation token authentication will be unavailable.", e);
        }
    }
    try {
        clientAddress = startTabletClientService();
    } catch (UnknownHostException e1) {
        throw new RuntimeException("Failed to start the tablet client service", e1);
    }
    announceExistence();
    try {
        walMarker.initWalMarker(getTabletSession());
    } catch (Exception e) {
        log.error("Unable to create WAL marker node in zookeeper", e);
        throw new RuntimeException(e);
    }
    ThreadPoolExecutor distWorkQThreadPool = new SimpleThreadPool(getConfiguration().getCount(Property.TSERV_WORKQ_THREADS), "distributed work queue");
    bulkFailedCopyQ = new DistributedWorkQueue(ZooUtil.getRoot(getInstance()) + Constants.ZBULK_FAILED_COPYQ, getConfiguration());
    try {
        bulkFailedCopyQ.startProcessing(new BulkFailedCopyProcessor(), distWorkQThreadPool);
    } catch (Exception e1) {
        throw new RuntimeException("Failed to start distributed work queue for copying ", e1);
    }
    try {
        logSorter.startWatchingForRecoveryLogs(distWorkQThreadPool);
    } catch (Exception ex) {
        log.error("Error setting watches for recoveries");
        throw new RuntimeException(ex);
    }
    // Start the thrift service listening for incoming replication requests
    try {
        replicationAddress = startReplicationService();
    } catch (UnknownHostException e) {
        throw new RuntimeException("Failed to start replication service", e);
    }
    // Start the pool to handle outgoing replications
    final ThreadPoolExecutor replicationThreadPool = new SimpleThreadPool(getConfiguration().getCount(Property.REPLICATION_WORKER_THREADS), "replication task");
    replWorker.setExecutor(replicationThreadPool);
    replWorker.run();
    // Check the configuration value for the size of the pool and, if changed, resize the pool, every 5 seconds);
    final AccumuloConfiguration aconf = getConfiguration();
    Runnable replicationWorkThreadPoolResizer = new Runnable() {

        @Override
        public void run() {
            int maxPoolSize = aconf.getCount(Property.REPLICATION_WORKER_THREADS);
            if (replicationThreadPool.getMaximumPoolSize() != maxPoolSize) {
                log.info("Resizing thread pool for sending replication work from {} to {}", replicationThreadPool.getMaximumPoolSize(), maxPoolSize);
                replicationThreadPool.setMaximumPoolSize(maxPoolSize);
            }
        }
    };
    SimpleTimer.getInstance(aconf).schedule(replicationWorkThreadPoolResizer, 10000, 30000);
    final long CLEANUP_BULK_LOADED_CACHE_MILLIS = 15 * 60 * 1000;
    SimpleTimer.getInstance(aconf).schedule(new BulkImportCacheCleaner(this), CLEANUP_BULK_LOADED_CACHE_MILLIS, CLEANUP_BULK_LOADED_CACHE_MILLIS);
    HostAndPort masterHost;
    while (!serverStopRequested) {
        // send all of the pending messages
        try {
            MasterMessage mm = null;
            MasterClientService.Client iface = null;
            try {
                // was requested
                while (mm == null && !serverStopRequested) {
                    mm = masterMessages.poll(1000, TimeUnit.MILLISECONDS);
                }
                // have a message to send to the master, so grab a
                // connection
                masterHost = getMasterAddress();
                iface = masterConnection(masterHost);
                TServiceClient client = iface;
                // then finally block should place mm back on queue
                while (!serverStopRequested && mm != null && client != null && client.getOutputProtocol() != null && client.getOutputProtocol().getTransport() != null && client.getOutputProtocol().getTransport().isOpen()) {
                    try {
                        mm.send(rpcCreds(), getClientAddressString(), iface);
                        mm = null;
                    } catch (TException ex) {
                        log.warn("Error sending message: queuing message again");
                        masterMessages.putFirst(mm);
                        mm = null;
                        throw ex;
                    }
                    // if any messages are immediately available grab em and
                    // send them
                    mm = masterMessages.poll();
                }
            } finally {
                if (mm != null) {
                    masterMessages.putFirst(mm);
                }
                returnMasterConnection(iface);
                sleepUninterruptibly(1, TimeUnit.SECONDS);
            }
        } catch (InterruptedException e) {
            log.info("Interrupt Exception received, shutting down");
            serverStopRequested = true;
        } catch (Exception e) {
            // may have lost connection with master
            // loop back to the beginning and wait for a new one
            // this way we survive master failures
            log.error(getClientAddressString() + ": TServerInfo: Exception. Master down?", e);
        }
    }
    // get prematurely finalized
    synchronized (this) {
        while (!shutdownComplete) {
            try {
                this.wait(1000);
            } catch (InterruptedException e) {
                log.error(e.toString());
            }
        }
    }
    log.debug("Stopping Replication Server");
    TServerUtils.stopTServer(this.replServer);
    log.debug("Stopping Thrift Servers");
    TServerUtils.stopTServer(server);
    try {
        log.debug("Closing filesystem");
        fs.close();
    } catch (IOException e) {
        log.warn("Failed to close filesystem : {}", e.getMessage(), e);
    }
    gcLogger.logGCInfo(getConfiguration());
    log.info("TServerInfo: stop requested. exiting ... ");
    try {
        tabletServerLock.unlock();
    } catch (Exception e) {
        log.warn("Failed to release tablet server lock", e);
    }
}
Also used : MasterMessage(org.apache.accumulo.tserver.mastermessage.MasterMessage) TException(org.apache.thrift.TException) UnknownHostException(java.net.UnknownHostException) IOException(java.io.IOException) IterationInterruptedException(org.apache.accumulo.core.iterators.IterationInterruptedException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) ThriftSecurityException(org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException) IterationInterruptedException(org.apache.accumulo.core.iterators.IterationInterruptedException) TSampleNotPresentException(org.apache.accumulo.core.tabletserver.thrift.TSampleNotPresentException) WalMarkerException(org.apache.accumulo.server.log.WalStateManager.WalMarkerException) ConstraintViolationException(org.apache.accumulo.core.tabletserver.thrift.ConstraintViolationException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) ExecutionException(java.util.concurrent.ExecutionException) NotServingTabletException(org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) KeeperException(org.apache.zookeeper.KeeperException) NoSuchScanIDException(org.apache.accumulo.core.tabletserver.thrift.NoSuchScanIDException) CancellationException(java.util.concurrent.CancellationException) DistributedStoreException(org.apache.accumulo.server.master.state.DistributedStoreException) TException(org.apache.thrift.TException) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ThriftTableOperationException(org.apache.accumulo.core.client.impl.thrift.ThriftTableOperationException) BadLocationStateException(org.apache.accumulo.server.master.state.TabletLocationState.BadLocationStateException) TimeoutException(java.util.concurrent.TimeoutException) TabletClosedException(org.apache.accumulo.tserver.tablet.TabletClosedException) SampleNotPresentException(org.apache.accumulo.core.client.SampleNotPresentException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) DistributedWorkQueue(org.apache.accumulo.server.zookeeper.DistributedWorkQueue) TServiceClient(org.apache.thrift.TServiceClient) HostAndPort(org.apache.accumulo.core.util.HostAndPort) TabletServerScanMetrics(org.apache.accumulo.tserver.metrics.TabletServerScanMetrics) Metrics(org.apache.accumulo.server.metrics.Metrics) TabletServerUpdateMetrics(org.apache.accumulo.tserver.metrics.TabletServerUpdateMetrics) BulkImportCacheCleaner(org.apache.accumulo.tserver.tablet.BulkImportCacheCleaner) LoggingRunnable(org.apache.accumulo.fate.util.LoggingRunnable) MasterClientService(org.apache.accumulo.core.master.thrift.MasterClientService) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) KeeperException(org.apache.zookeeper.KeeperException) SimpleThreadPool(org.apache.accumulo.core.util.SimpleThreadPool) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration)

Aggregations

IOException (java.io.IOException)1 UnknownHostException (java.net.UnknownHostException)1 CancellationException (java.util.concurrent.CancellationException)1 ExecutionException (java.util.concurrent.ExecutionException)1 ThreadPoolExecutor (java.util.concurrent.ThreadPoolExecutor)1 TimeoutException (java.util.concurrent.TimeoutException)1 AccumuloException (org.apache.accumulo.core.client.AccumuloException)1 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)1 SampleNotPresentException (org.apache.accumulo.core.client.SampleNotPresentException)1 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)1 ThriftSecurityException (org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException)1 ThriftTableOperationException (org.apache.accumulo.core.client.impl.thrift.ThriftTableOperationException)1 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)1 IterationInterruptedException (org.apache.accumulo.core.iterators.IterationInterruptedException)1 MasterClientService (org.apache.accumulo.core.master.thrift.MasterClientService)1 ConstraintViolationException (org.apache.accumulo.core.tabletserver.thrift.ConstraintViolationException)1 NoSuchScanIDException (org.apache.accumulo.core.tabletserver.thrift.NoSuchScanIDException)1 NotServingTabletException (org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException)1 TSampleNotPresentException (org.apache.accumulo.core.tabletserver.thrift.TSampleNotPresentException)1 HostAndPort (org.apache.accumulo.core.util.HostAndPort)1