Search in sources :

Example 1 with Threads

use of org.apache.accumulo.core.util.threads.Threads in project accumulo by apache.

the class Manager method gatherTableInformation.

private SortedMap<TServerInstance, TabletServerStatus> gatherTableInformation(Set<TServerInstance> currentServers, SortedMap<TabletServerId, TServerStatus> balancerMap) {
    final long rpcTimeout = getConfiguration().getTimeInMillis(Property.GENERAL_RPC_TIMEOUT);
    int threads = getConfiguration().getCount(Property.MANAGER_STATUS_THREAD_POOL_SIZE);
    ExecutorService tp = ThreadPools.createExecutorService(getConfiguration(), Property.MANAGER_STATUS_THREAD_POOL_SIZE, false);
    long start = System.currentTimeMillis();
    final SortedMap<TServerInstance, TabletServerStatus> result = new ConcurrentSkipListMap<>();
    final RateLimiter shutdownServerRateLimiter = RateLimiter.create(MAX_SHUTDOWNS_PER_SEC);
    for (TServerInstance serverInstance : currentServers) {
        final TServerInstance server = serverInstance;
        if (threads == 0) {
            // Since an unbounded thread pool is being used, rate limit how fast task are added to the
            // executor. This prevents the threads from growing large unless there are lots of
            // unresponsive tservers.
            sleepUninterruptibly(Math.max(1, rpcTimeout / 120_000), TimeUnit.MILLISECONDS);
        }
        tp.submit(() -> {
            try {
                Thread t = Thread.currentThread();
                String oldName = t.getName();
                try {
                    String message = "Getting status from " + server;
                    t.setName(message);
                    long startForServer = System.currentTimeMillis();
                    log.trace(message);
                    TServerConnection connection1 = tserverSet.getConnection(server);
                    if (connection1 == null) {
                        throw new IOException("No connection to " + server);
                    }
                    TabletServerStatus status = connection1.getTableMap(false);
                    result.put(server, status);
                    long duration = System.currentTimeMillis() - startForServer;
                    log.trace("Got status from {} in {} ms", server, duration);
                } finally {
                    t.setName(oldName);
                }
            } catch (Exception ex) {
                log.error("unable to get tablet server status {} {}", server, ex.toString());
                log.debug("unable to get tablet server status {}", server, ex);
                // MAX_BAD_STATUS_COUNT times
                if (badServers.computeIfAbsent(server, k -> new AtomicInteger(0)).incrementAndGet() > MAX_BAD_STATUS_COUNT) {
                    if (shutdownServerRateLimiter.tryAcquire()) {
                        log.warn("attempting to stop {}", server);
                        try {
                            TServerConnection connection2 = tserverSet.getConnection(server);
                            if (connection2 != null) {
                                connection2.halt(managerLock);
                            }
                        } catch (TTransportException e1) {
                        // ignore: it's probably down
                        } catch (Exception e2) {
                            log.info("error talking to troublesome tablet server", e2);
                        }
                    } else {
                        log.warn("Unable to shutdown {} as over the shutdown limit of {} per minute", server, MAX_SHUTDOWNS_PER_SEC * 60);
                    }
                    badServers.remove(server);
                }
            }
        });
    }
    tp.shutdown();
    try {
        tp.awaitTermination(Math.max(10000, rpcTimeout / 3), TimeUnit.MILLISECONDS);
    } catch (InterruptedException e) {
        log.debug("Interrupted while fetching status");
    }
    tp.shutdownNow();
    // Threads may still modify map after shutdownNow is called, so create an immutable snapshot.
    SortedMap<TServerInstance, TabletServerStatus> info = ImmutableSortedMap.copyOf(result);
    tserverStatus.forEach((tsi, status) -> balancerMap.put(new TabletServerIdImpl(tsi), TServerStatusImpl.fromThrift(status)));
    synchronized (badServers) {
        badServers.keySet().retainAll(currentServers);
        badServers.keySet().removeAll(info.keySet());
    }
    log.debug(String.format("Finished gathering information from %d of %d servers in %.2f seconds", info.size(), currentServers.size(), (System.currentTimeMillis() - start) / 1000.));
    return info;
}
Also used : MergeInfo(org.apache.accumulo.server.manager.state.MergeInfo) TableInfo(org.apache.accumulo.core.master.thrift.TableInfo) ManagerMonitorInfo(org.apache.accumulo.core.manager.thrift.ManagerMonitorInfo) TServer(org.apache.thrift.server.TServer) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) Future(java.util.concurrent.Future) MergeState(org.apache.accumulo.server.manager.state.MergeState) Map(java.util.Map) RootTable(org.apache.accumulo.core.metadata.RootTable) ServerBulkImportStatus(org.apache.accumulo.server.util.ServerBulkImportStatus) ThriftServerType(org.apache.accumulo.server.rpc.ThriftServerType) ServerAddress(org.apache.accumulo.server.rpc.ServerAddress) BulkImportState(org.apache.accumulo.core.master.thrift.BulkImportState) Property(org.apache.accumulo.core.conf.Property) ServiceLockPath(org.apache.accumulo.fate.zookeeper.ServiceLock.ServiceLockPath) InstanceId(org.apache.accumulo.core.data.InstanceId) TableState(org.apache.accumulo.core.manager.state.tables.TableState) VolumeManager(org.apache.accumulo.server.fs.VolumeManager) UpgradeCoordinator(org.apache.accumulo.manager.upgrade.UpgradeCoordinator) Set(java.util.Set) AccumuloClient(org.apache.accumulo.core.client.AccumuloClient) TabletState(org.apache.accumulo.core.metadata.TabletState) NodeExistsPolicy(org.apache.accumulo.fate.zookeeper.ZooUtil.NodeExistsPolicy) TabletServerIdImpl(org.apache.accumulo.core.manager.balancer.TabletServerIdImpl) AuthenticationTokenSecretManager(org.apache.accumulo.server.security.delegation.AuthenticationTokenSecretManager) ManagerState(org.apache.accumulo.core.manager.thrift.ManagerState) ServiceLock(org.apache.accumulo.fate.zookeeper.ServiceLock) HighlyAvailableServiceWrapper(org.apache.accumulo.server.rpc.HighlyAvailableServiceWrapper) AuditedSecurityOperation(org.apache.accumulo.server.security.AuditedSecurityOperation) UtilWaitThread.sleepUninterruptibly(org.apache.accumulo.fate.util.UtilWaitThread.sleepUninterruptibly) NoAuthException(org.apache.zookeeper.KeeperException.NoAuthException) Scanner(org.apache.accumulo.core.client.Scanner) TableObserver(org.apache.accumulo.server.tables.TableObserver) SecurityOperation(org.apache.accumulo.server.security.SecurityOperation) TabletLocationState(org.apache.accumulo.core.metadata.TabletLocationState) Threads(org.apache.accumulo.core.util.threads.Threads) ZooUtil(org.apache.accumulo.fate.zookeeper.ZooUtil) AbstractServer(org.apache.accumulo.server.AbstractServer) TabletServerId(org.apache.accumulo.core.spi.balancer.data.TabletServerId) RateLimiter(com.google.common.util.concurrent.RateLimiter) ArrayList(java.util.ArrayList) AssignmentParamsImpl(org.apache.accumulo.core.manager.balancer.AssignmentParamsImpl) MetricsUtil(org.apache.accumulo.core.metrics.MetricsUtil) Iface(org.apache.accumulo.core.manager.thrift.ManagerClientService.Iface) Collections.emptySortedMap(java.util.Collections.emptySortedMap) ManagerGoalState(org.apache.accumulo.core.manager.thrift.ManagerGoalState) Key(org.apache.accumulo.core.data.Key) Fate(org.apache.accumulo.fate.Fate) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) ServerContext(org.apache.accumulo.server.ServerContext) Watcher(org.apache.zookeeper.Watcher) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) TException(org.apache.thrift.TException) IOException(java.io.IOException) ManagerMetrics(org.apache.accumulo.manager.metrics.ManagerMetrics) UnknownHostException(java.net.UnknownHostException) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) ExecutionException(java.util.concurrent.ExecutionException) TabletServerState(org.apache.accumulo.server.manager.state.TabletServerState) CurrentState(org.apache.accumulo.server.manager.state.CurrentState) TreeMap(java.util.TreeMap) Processor(org.apache.accumulo.core.manager.thrift.ManagerClientService.Processor) TabletBalancer(org.apache.accumulo.core.spi.balancer.TabletBalancer) TableId(org.apache.accumulo.core.data.TableId) ServerOpts(org.apache.accumulo.server.ServerOpts) LockLossReason(org.apache.accumulo.fate.zookeeper.ServiceLock.LockLossReason) TabletServerStatus(org.apache.accumulo.core.master.thrift.TabletServerStatus) TTransportException(org.apache.thrift.transport.TTransportException) TCredentialsUpdatingWrapper(org.apache.accumulo.server.rpc.TCredentialsUpdatingWrapper) LoggerFactory(org.slf4j.LoggerFactory) MetadataTable(org.apache.accumulo.core.metadata.MetadataTable) TServerUtils(org.apache.accumulo.server.rpc.TServerUtils) ZooAuthenticationKeyDistributor(org.apache.accumulo.server.security.delegation.ZooAuthenticationKeyDistributor) TServerStatus(org.apache.accumulo.core.spi.balancer.data.TServerStatus) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) RecoveryManager(org.apache.accumulo.manager.recovery.RecoveryManager) LiveTServerSet(org.apache.accumulo.server.manager.LiveTServerSet) Value(org.apache.accumulo.core.data.Value) TUnloadTabletGoal(org.apache.accumulo.core.tabletserver.thrift.TUnloadTabletGoal) SimpleLoadBalancer(org.apache.accumulo.core.spi.balancer.SimpleLoadBalancer) Span(io.opentelemetry.api.trace.Span) Collection(java.util.Collection) ThreadPools(org.apache.accumulo.core.util.threads.ThreadPools) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) TableCounts(org.apache.accumulo.manager.state.TableCounts) List(java.util.List) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) Entry(java.util.Map.Entry) TraceUtil(org.apache.accumulo.core.trace.TraceUtil) AuthenticationTokenKeyManager(org.apache.accumulo.server.security.delegation.AuthenticationTokenKeyManager) TableInfoUtil(org.apache.accumulo.server.util.TableInfoUtil) SortedMap(java.util.SortedMap) TableManager(org.apache.accumulo.server.tables.TableManager) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) BalancerEnvironment(org.apache.accumulo.core.spi.balancer.BalancerEnvironment) TServerStatusImpl(org.apache.accumulo.core.manager.balancer.TServerStatusImpl) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) AtomicReference(java.util.concurrent.atomic.AtomicReference) HashSet(java.util.HashSet) Halt(org.apache.accumulo.core.util.Halt) DeadServerList(org.apache.accumulo.server.manager.state.DeadServerList) TableOperationExceptionType(org.apache.accumulo.core.clientImpl.thrift.TableOperationExceptionType) ZooReaderWriter(org.apache.accumulo.fate.zookeeper.ZooReaderWriter) NodeMissingPolicy(org.apache.accumulo.fate.zookeeper.ZooUtil.NodeMissingPolicy) BalanceParamsImpl(org.apache.accumulo.core.manager.balancer.BalanceParamsImpl) ExecutorService(java.util.concurrent.ExecutorService) TServerConnection(org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection) Retry(org.apache.accumulo.fate.util.Retry) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) KeeperException(org.apache.zookeeper.KeeperException) Scope(io.opentelemetry.context.Scope) UTF_8(java.nio.charset.StandardCharsets.UTF_8) DataLevel(org.apache.accumulo.core.metadata.schema.Ample.DataLevel) HighlyAvailableService(org.apache.accumulo.server.HighlyAvailableService) TabletStateStore(org.apache.accumulo.server.manager.state.TabletStateStore) TabletMigration(org.apache.accumulo.core.spi.balancer.data.TabletMigration) Constants(org.apache.accumulo.core.Constants) Authorizations(org.apache.accumulo.core.security.Authorizations) WatchedEvent(org.apache.zookeeper.WatchedEvent) TimeUnit(java.util.concurrent.TimeUnit) TableOperation(org.apache.accumulo.core.clientImpl.thrift.TableOperation) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) ThriftTableOperationException(org.apache.accumulo.core.clientImpl.thrift.ThriftTableOperationException) AgeOffStore(org.apache.accumulo.fate.AgeOffStore) BalancerEnvironmentImpl(org.apache.accumulo.server.manager.balancer.BalancerEnvironmentImpl) TabletColumnFamily(org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily) TraceRepo(org.apache.accumulo.manager.tableOps.TraceRepo) Collections(java.util.Collections) ReplicationCoordinator(org.apache.accumulo.core.replication.thrift.ReplicationCoordinator) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) TTransportException(org.apache.thrift.transport.TTransportException) IOException(java.io.IOException) TabletServerIdImpl(org.apache.accumulo.core.manager.balancer.TabletServerIdImpl) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) RateLimiter(com.google.common.util.concurrent.RateLimiter) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) NoAuthException(org.apache.zookeeper.KeeperException.NoAuthException) TException(org.apache.thrift.TException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) ExecutionException(java.util.concurrent.ExecutionException) TTransportException(org.apache.thrift.transport.TTransportException) KeeperException(org.apache.zookeeper.KeeperException) ThriftTableOperationException(org.apache.accumulo.core.clientImpl.thrift.ThriftTableOperationException) TServerConnection(org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ExecutorService(java.util.concurrent.ExecutorService) TabletServerStatus(org.apache.accumulo.core.master.thrift.TabletServerStatus)

Aggregations

ImmutableSortedMap (com.google.common.collect.ImmutableSortedMap)1 RateLimiter (com.google.common.util.concurrent.RateLimiter)1 SuppressFBWarnings (edu.umd.cs.findbugs.annotations.SuppressFBWarnings)1 Span (io.opentelemetry.api.trace.Span)1 Scope (io.opentelemetry.context.Scope)1 IOException (java.io.IOException)1 UnknownHostException (java.net.UnknownHostException)1 UTF_8 (java.nio.charset.StandardCharsets.UTF_8)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 Collections.emptySortedMap (java.util.Collections.emptySortedMap)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Iterator (java.util.Iterator)1 List (java.util.List)1 Map (java.util.Map)1 Entry (java.util.Map.Entry)1 Set (java.util.Set)1 SortedMap (java.util.SortedMap)1