Search in sources :

Example 1 with ManagerMessage

use of org.apache.accumulo.tserver.managermessage.ManagerMessage in project accumulo by apache.

the class TabletServer method run.

// main loop listens for client requests
@Override
public void run() {
    SecurityUtil.serverLogin(getConfiguration());
    // To make things easier on users/devs, and to avoid creating an upgrade path to 1.7
    // We can just make the zookeeper paths before we try to use.
    initializeZkForReplication();
    if (authKeyWatcher != null) {
        log.info("Seeding ZooKeeper watcher for authentication keys");
        try {
            authKeyWatcher.updateAuthKeys();
        } catch (KeeperException | InterruptedException e) {
            // TODO Does there need to be a better check? What are the error conditions that we'd fall
            // out here? AUTH_FAILURE?
            // If we get the error, do we just put it on a timer and retry the exists(String, Watcher)
            // call?
            log.error("Failed to perform initial check for authentication tokens in" + " ZooKeeper. Delegation token authentication will be unavailable.", e);
        }
    }
    try {
        clientAddress = startTabletClientService();
    } catch (UnknownHostException e1) {
        throw new RuntimeException("Failed to start the tablet client service", e1);
    }
    announceExistence();
    try {
        MetricsUtil.initializeMetrics(context.getConfiguration(), this.applicationName, clientAddress);
    } catch (Exception e1) {
        log.error("Error initializing metrics, metrics will not be emitted.", e1);
    }
    metrics = new TabletServerMetrics(this);
    updateMetrics = new TabletServerUpdateMetrics();
    scanMetrics = new TabletServerScanMetrics();
    mincMetrics = new TabletServerMinCMetrics();
    ceMetrics = new CompactionExecutorsMetrics();
    MetricsUtil.initializeProducers(metrics, updateMetrics, scanMetrics, mincMetrics, ceMetrics);
    this.compactionManager = new CompactionManager(new Iterable<Compactable>() {

        @Override
        public Iterator<Compactable> iterator() {
            return Iterators.transform(onlineTablets.snapshot().values().iterator(), Tablet::asCompactable);
        }
    }, getContext(), ceMetrics);
    compactionManager.start();
    try {
        walMarker.initWalMarker(getTabletSession());
    } catch (Exception e) {
        log.error("Unable to create WAL marker node in zookeeper", e);
        throw new RuntimeException(e);
    }
    ThreadPoolExecutor distWorkQThreadPool = ThreadPools.createExecutorService(getConfiguration(), Property.TSERV_WORKQ_THREADS, true);
    bulkFailedCopyQ = new DistributedWorkQueue(getContext().getZooKeeperRoot() + Constants.ZBULK_FAILED_COPYQ, getConfiguration(), getContext());
    try {
        bulkFailedCopyQ.startProcessing(new BulkFailedCopyProcessor(getContext()), distWorkQThreadPool);
    } catch (Exception e1) {
        throw new RuntimeException("Failed to start distributed work queue for copying ", e1);
    }
    try {
        logSorter.startWatchingForRecoveryLogs(distWorkQThreadPool);
    } catch (Exception ex) {
        log.error("Error setting watches for recoveries");
        throw new RuntimeException(ex);
    }
    final AccumuloConfiguration aconf = getConfiguration();
    // if the replication name is ever set, then start replication services
    @SuppressWarnings("deprecation") Property p = Property.REPLICATION_NAME;
    context.getScheduledExecutor().scheduleWithFixedDelay(() -> {
        if (this.replServer == null) {
            if (!getConfiguration().get(p).isEmpty()) {
                log.info(p.getKey() + " was set, starting repl services.");
                setupReplication(aconf);
            }
        }
    }, 0, 5, TimeUnit.SECONDS);
    // random 30-60 minute delay
    int tabletCheckFrequency = 30 + random.nextInt(31);
    // Periodically check that metadata of tablets matches what is held in memory
    ThreadPools.createGeneralScheduledExecutorService(aconf).scheduleWithFixedDelay(() -> {
        final SortedMap<KeyExtent, Tablet> onlineTabletsSnapshot = onlineTablets.snapshot();
        Map<KeyExtent, Long> updateCounts = new HashMap<>();
        // gather updateCounts for each tablet
        onlineTabletsSnapshot.forEach((ke, tablet) -> {
            updateCounts.put(ke, tablet.getUpdateCount());
        });
        // gather metadata for all tablets readTablets()
        try (TabletsMetadata tabletsMetadata = getContext().getAmple().readTablets().forTablets(onlineTabletsSnapshot.keySet()).fetch(FILES, LOGS, ECOMP, PREV_ROW).build()) {
            // for each tablet, compare its metadata to what is held in memory
            tabletsMetadata.forEach(tabletMetadata -> {
                KeyExtent extent = tabletMetadata.getExtent();
                Tablet tablet = onlineTabletsSnapshot.get(extent);
                Long counter = updateCounts.get(extent);
                tablet.compareTabletInfo(counter, tabletMetadata);
            });
        }
    }, tabletCheckFrequency, tabletCheckFrequency, TimeUnit.MINUTES);
    final long CLEANUP_BULK_LOADED_CACHE_MILLIS = TimeUnit.MINUTES.toMillis(15);
    context.getScheduledExecutor().scheduleWithFixedDelay(new BulkImportCacheCleaner(this), CLEANUP_BULK_LOADED_CACHE_MILLIS, CLEANUP_BULK_LOADED_CACHE_MILLIS, TimeUnit.MILLISECONDS);
    HostAndPort managerHost;
    while (!serverStopRequested) {
        // send all of the pending messages
        try {
            ManagerMessage mm = null;
            ManagerClientService.Client iface = null;
            try {
                // was requested
                while (mm == null && !serverStopRequested) {
                    mm = managerMessages.poll(1, TimeUnit.SECONDS);
                }
                // have a message to send to the manager, so grab a
                // connection
                managerHost = getManagerAddress();
                iface = managerConnection(managerHost);
                TServiceClient client = iface;
                // then finally block should place mm back on queue
                while (!serverStopRequested && mm != null && client != null && client.getOutputProtocol() != null && client.getOutputProtocol().getTransport() != null && client.getOutputProtocol().getTransport().isOpen()) {
                    try {
                        mm.send(getContext().rpcCreds(), getClientAddressString(), iface);
                        mm = null;
                    } catch (TException ex) {
                        log.warn("Error sending message: queuing message again");
                        managerMessages.putFirst(mm);
                        mm = null;
                        throw ex;
                    }
                    // if any messages are immediately available grab em and
                    // send them
                    mm = managerMessages.poll();
                }
            } finally {
                if (mm != null) {
                    managerMessages.putFirst(mm);
                }
                returnManagerConnection(iface);
                sleepUninterruptibly(1, TimeUnit.SECONDS);
            }
        } catch (InterruptedException e) {
            log.info("Interrupt Exception received, shutting down");
            serverStopRequested = true;
        } catch (Exception e) {
            // may have lost connection with manager
            // loop back to the beginning and wait for a new one
            // this way we survive manager failures
            log.error(getClientAddressString() + ": TServerInfo: Exception. Manager down?", e);
        }
    }
    // get prematurely finalized
    synchronized (this) {
        while (!shutdownComplete) {
            try {
                this.wait(1000);
            } catch (InterruptedException e) {
                log.error(e.toString());
            }
        }
    }
    log.debug("Stopping Replication Server");
    if (this.replServer != null) {
        this.replServer.stop();
    }
    log.debug("Stopping Thrift Servers");
    if (server != null) {
        server.stop();
    }
    try {
        log.debug("Closing filesystems");
        getVolumeManager().close();
    } catch (IOException e) {
        log.warn("Failed to close filesystem : {}", e.getMessage(), e);
    }
    gcLogger.logGCInfo(getConfiguration());
    log.info("TServerInfo: stop requested. exiting ... ");
    try {
        tabletServerLock.unlock();
    } catch (Exception e) {
        log.warn("Failed to release tablet server lock", e);
    }
}
Also used : TException(org.apache.thrift.TException) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) Compactable(org.apache.accumulo.tserver.compactions.Compactable) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) TabletServerMinCMetrics(org.apache.accumulo.tserver.metrics.TabletServerMinCMetrics) DistributedWorkQueue(org.apache.accumulo.server.zookeeper.DistributedWorkQueue) TServiceClient(org.apache.thrift.TServiceClient) HostAndPort(org.apache.accumulo.core.util.HostAndPort) TabletServerMetrics(org.apache.accumulo.tserver.metrics.TabletServerMetrics) TabletsMetadata(org.apache.accumulo.core.metadata.schema.TabletsMetadata) Tablet(org.apache.accumulo.tserver.tablet.Tablet) Property(org.apache.accumulo.core.conf.Property) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) ManagerClientService(org.apache.accumulo.core.manager.thrift.ManagerClientService) CompactionExecutorsMetrics(org.apache.accumulo.tserver.metrics.CompactionExecutorsMetrics) UnknownHostException(java.net.UnknownHostException) TabletServerUpdateMetrics(org.apache.accumulo.tserver.metrics.TabletServerUpdateMetrics) IOException(java.io.IOException) WalMarkerException(org.apache.accumulo.server.log.WalStateManager.WalMarkerException) TException(org.apache.thrift.TException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) KeeperException(org.apache.zookeeper.KeeperException) BulkImportCacheCleaner(org.apache.accumulo.tserver.tablet.BulkImportCacheCleaner) ManagerMessage(org.apache.accumulo.tserver.managermessage.ManagerMessage) CompactionManager(org.apache.accumulo.tserver.compactions.CompactionManager) AtomicLong(java.util.concurrent.atomic.AtomicLong) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) KeeperException(org.apache.zookeeper.KeeperException) TabletServerScanMetrics(org.apache.accumulo.tserver.metrics.TabletServerScanMetrics)

Aggregations

IOException (java.io.IOException)1 UnknownHostException (java.net.UnknownHostException)1 HashMap (java.util.HashMap)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 ThreadPoolExecutor (java.util.concurrent.ThreadPoolExecutor)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)1 Property (org.apache.accumulo.core.conf.Property)1 KeyExtent (org.apache.accumulo.core.dataImpl.KeyExtent)1 ManagerClientService (org.apache.accumulo.core.manager.thrift.ManagerClientService)1 TabletsMetadata (org.apache.accumulo.core.metadata.schema.TabletsMetadata)1 HostAndPort (org.apache.accumulo.core.util.HostAndPort)1 WalMarkerException (org.apache.accumulo.server.log.WalStateManager.WalMarkerException)1 DistributedWorkQueue (org.apache.accumulo.server.zookeeper.DistributedWorkQueue)1 Compactable (org.apache.accumulo.tserver.compactions.Compactable)1 CompactionManager (org.apache.accumulo.tserver.compactions.CompactionManager)1 ManagerMessage (org.apache.accumulo.tserver.managermessage.ManagerMessage)1 CompactionExecutorsMetrics (org.apache.accumulo.tserver.metrics.CompactionExecutorsMetrics)1 TabletServerMetrics (org.apache.accumulo.tserver.metrics.TabletServerMetrics)1 TabletServerMinCMetrics (org.apache.accumulo.tserver.metrics.TabletServerMinCMetrics)1