Search in sources :

Example 1 with DeadServerList

use of org.apache.accumulo.server.manager.state.DeadServerList in project accumulo by apache.

the class TabletServerResource method clearDeadServer.

/**
 * REST call to clear dead servers from list
 *
 * @param server
 *          Dead server to clear
 */
@POST
@Consumes(MediaType.TEXT_PLAIN)
public void clearDeadServer(@QueryParam("server") @NotNull @Pattern(regexp = HOSTNAME_PORT_REGEX) String server) {
    DeadServerList obit = new DeadServerList(monitor.getContext());
    obit.delete(server);
}
Also used : DeadServerList(org.apache.accumulo.server.manager.state.DeadServerList) POST(jakarta.ws.rs.POST) Consumes(jakarta.ws.rs.Consumes)

Example 2 with DeadServerList

use of org.apache.accumulo.server.manager.state.DeadServerList in project accumulo by apache.

the class Manager method getManagerMonitorInfo.

public ManagerMonitorInfo getManagerMonitorInfo() {
    final ManagerMonitorInfo result = new ManagerMonitorInfo();
    result.tServerInfo = new ArrayList<>();
    result.tableMap = new HashMap<>();
    for (Entry<TServerInstance, TabletServerStatus> serverEntry : tserverStatus.entrySet()) {
        final TabletServerStatus status = serverEntry.getValue();
        result.tServerInfo.add(status);
        for (Entry<String, TableInfo> entry : status.tableMap.entrySet()) {
            TableInfoUtil.add(result.tableMap.computeIfAbsent(entry.getKey(), k -> new TableInfo()), entry.getValue());
        }
    }
    result.badTServers = new HashMap<>();
    synchronized (badServers) {
        for (TServerInstance bad : badServers.keySet()) {
            result.badTServers.put(bad.getHostPort(), TabletServerState.UNRESPONSIVE.getId());
        }
    }
    result.state = getManagerState();
    result.goalState = getManagerGoalState();
    result.unassignedTablets = displayUnassigned();
    result.serversShuttingDown = new HashSet<>();
    synchronized (serversToShutdown) {
        for (TServerInstance server : serversToShutdown) {
            result.serversShuttingDown.add(server.getHostPort());
        }
    }
    DeadServerList obit = new DeadServerList(getContext());
    result.deadTabletServers = obit.getList();
    result.bulkImports = bulkImportStatus.getBulkLoadStatus();
    return result;
}
Also used : MergeInfo(org.apache.accumulo.server.manager.state.MergeInfo) TableInfo(org.apache.accumulo.core.master.thrift.TableInfo) ManagerMonitorInfo(org.apache.accumulo.core.manager.thrift.ManagerMonitorInfo) TServer(org.apache.thrift.server.TServer) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) Future(java.util.concurrent.Future) MergeState(org.apache.accumulo.server.manager.state.MergeState) Map(java.util.Map) RootTable(org.apache.accumulo.core.metadata.RootTable) ServerBulkImportStatus(org.apache.accumulo.server.util.ServerBulkImportStatus) ThriftServerType(org.apache.accumulo.server.rpc.ThriftServerType) ServerAddress(org.apache.accumulo.server.rpc.ServerAddress) BulkImportState(org.apache.accumulo.core.master.thrift.BulkImportState) Property(org.apache.accumulo.core.conf.Property) ServiceLockPath(org.apache.accumulo.fate.zookeeper.ServiceLock.ServiceLockPath) InstanceId(org.apache.accumulo.core.data.InstanceId) TableState(org.apache.accumulo.core.manager.state.tables.TableState) VolumeManager(org.apache.accumulo.server.fs.VolumeManager) UpgradeCoordinator(org.apache.accumulo.manager.upgrade.UpgradeCoordinator) Set(java.util.Set) AccumuloClient(org.apache.accumulo.core.client.AccumuloClient) TabletState(org.apache.accumulo.core.metadata.TabletState) NodeExistsPolicy(org.apache.accumulo.fate.zookeeper.ZooUtil.NodeExistsPolicy) TabletServerIdImpl(org.apache.accumulo.core.manager.balancer.TabletServerIdImpl) AuthenticationTokenSecretManager(org.apache.accumulo.server.security.delegation.AuthenticationTokenSecretManager) ManagerState(org.apache.accumulo.core.manager.thrift.ManagerState) ServiceLock(org.apache.accumulo.fate.zookeeper.ServiceLock) HighlyAvailableServiceWrapper(org.apache.accumulo.server.rpc.HighlyAvailableServiceWrapper) AuditedSecurityOperation(org.apache.accumulo.server.security.AuditedSecurityOperation) UtilWaitThread.sleepUninterruptibly(org.apache.accumulo.fate.util.UtilWaitThread.sleepUninterruptibly) NoAuthException(org.apache.zookeeper.KeeperException.NoAuthException) Scanner(org.apache.accumulo.core.client.Scanner) TableObserver(org.apache.accumulo.server.tables.TableObserver) SecurityOperation(org.apache.accumulo.server.security.SecurityOperation) TabletLocationState(org.apache.accumulo.core.metadata.TabletLocationState) Threads(org.apache.accumulo.core.util.threads.Threads) ZooUtil(org.apache.accumulo.fate.zookeeper.ZooUtil) AbstractServer(org.apache.accumulo.server.AbstractServer) TabletServerId(org.apache.accumulo.core.spi.balancer.data.TabletServerId) RateLimiter(com.google.common.util.concurrent.RateLimiter) ArrayList(java.util.ArrayList) AssignmentParamsImpl(org.apache.accumulo.core.manager.balancer.AssignmentParamsImpl) MetricsUtil(org.apache.accumulo.core.metrics.MetricsUtil) Iface(org.apache.accumulo.core.manager.thrift.ManagerClientService.Iface) Collections.emptySortedMap(java.util.Collections.emptySortedMap) ManagerGoalState(org.apache.accumulo.core.manager.thrift.ManagerGoalState) Key(org.apache.accumulo.core.data.Key) Fate(org.apache.accumulo.fate.Fate) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) ServerContext(org.apache.accumulo.server.ServerContext) Watcher(org.apache.zookeeper.Watcher) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) TException(org.apache.thrift.TException) IOException(java.io.IOException) ManagerMetrics(org.apache.accumulo.manager.metrics.ManagerMetrics) UnknownHostException(java.net.UnknownHostException) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) ExecutionException(java.util.concurrent.ExecutionException) TabletServerState(org.apache.accumulo.server.manager.state.TabletServerState) CurrentState(org.apache.accumulo.server.manager.state.CurrentState) TreeMap(java.util.TreeMap) Processor(org.apache.accumulo.core.manager.thrift.ManagerClientService.Processor) TabletBalancer(org.apache.accumulo.core.spi.balancer.TabletBalancer) TableId(org.apache.accumulo.core.data.TableId) ServerOpts(org.apache.accumulo.server.ServerOpts) LockLossReason(org.apache.accumulo.fate.zookeeper.ServiceLock.LockLossReason) TabletServerStatus(org.apache.accumulo.core.master.thrift.TabletServerStatus) TTransportException(org.apache.thrift.transport.TTransportException) TCredentialsUpdatingWrapper(org.apache.accumulo.server.rpc.TCredentialsUpdatingWrapper) LoggerFactory(org.slf4j.LoggerFactory) MetadataTable(org.apache.accumulo.core.metadata.MetadataTable) TServerUtils(org.apache.accumulo.server.rpc.TServerUtils) ZooAuthenticationKeyDistributor(org.apache.accumulo.server.security.delegation.ZooAuthenticationKeyDistributor) TServerStatus(org.apache.accumulo.core.spi.balancer.data.TServerStatus) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) RecoveryManager(org.apache.accumulo.manager.recovery.RecoveryManager) LiveTServerSet(org.apache.accumulo.server.manager.LiveTServerSet) Value(org.apache.accumulo.core.data.Value) TUnloadTabletGoal(org.apache.accumulo.core.tabletserver.thrift.TUnloadTabletGoal) SimpleLoadBalancer(org.apache.accumulo.core.spi.balancer.SimpleLoadBalancer) Span(io.opentelemetry.api.trace.Span) Collection(java.util.Collection) ThreadPools(org.apache.accumulo.core.util.threads.ThreadPools) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) TableCounts(org.apache.accumulo.manager.state.TableCounts) List(java.util.List) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) Entry(java.util.Map.Entry) TraceUtil(org.apache.accumulo.core.trace.TraceUtil) AuthenticationTokenKeyManager(org.apache.accumulo.server.security.delegation.AuthenticationTokenKeyManager) TableInfoUtil(org.apache.accumulo.server.util.TableInfoUtil) SortedMap(java.util.SortedMap) TableManager(org.apache.accumulo.server.tables.TableManager) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) BalancerEnvironment(org.apache.accumulo.core.spi.balancer.BalancerEnvironment) TServerStatusImpl(org.apache.accumulo.core.manager.balancer.TServerStatusImpl) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) AtomicReference(java.util.concurrent.atomic.AtomicReference) HashSet(java.util.HashSet) Halt(org.apache.accumulo.core.util.Halt) DeadServerList(org.apache.accumulo.server.manager.state.DeadServerList) TableOperationExceptionType(org.apache.accumulo.core.clientImpl.thrift.TableOperationExceptionType) ZooReaderWriter(org.apache.accumulo.fate.zookeeper.ZooReaderWriter) NodeMissingPolicy(org.apache.accumulo.fate.zookeeper.ZooUtil.NodeMissingPolicy) BalanceParamsImpl(org.apache.accumulo.core.manager.balancer.BalanceParamsImpl) ExecutorService(java.util.concurrent.ExecutorService) TServerConnection(org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection) Retry(org.apache.accumulo.fate.util.Retry) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) KeeperException(org.apache.zookeeper.KeeperException) Scope(io.opentelemetry.context.Scope) UTF_8(java.nio.charset.StandardCharsets.UTF_8) DataLevel(org.apache.accumulo.core.metadata.schema.Ample.DataLevel) HighlyAvailableService(org.apache.accumulo.server.HighlyAvailableService) TabletStateStore(org.apache.accumulo.server.manager.state.TabletStateStore) TabletMigration(org.apache.accumulo.core.spi.balancer.data.TabletMigration) Constants(org.apache.accumulo.core.Constants) Authorizations(org.apache.accumulo.core.security.Authorizations) WatchedEvent(org.apache.zookeeper.WatchedEvent) TimeUnit(java.util.concurrent.TimeUnit) TableOperation(org.apache.accumulo.core.clientImpl.thrift.TableOperation) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) ThriftTableOperationException(org.apache.accumulo.core.clientImpl.thrift.ThriftTableOperationException) AgeOffStore(org.apache.accumulo.fate.AgeOffStore) BalancerEnvironmentImpl(org.apache.accumulo.server.manager.balancer.BalancerEnvironmentImpl) TabletColumnFamily(org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily) TraceRepo(org.apache.accumulo.manager.tableOps.TraceRepo) Collections(java.util.Collections) ReplicationCoordinator(org.apache.accumulo.core.replication.thrift.ReplicationCoordinator) ManagerMonitorInfo(org.apache.accumulo.core.manager.thrift.ManagerMonitorInfo) TableInfo(org.apache.accumulo.core.master.thrift.TableInfo) DeadServerList(org.apache.accumulo.server.manager.state.DeadServerList) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) TabletServerStatus(org.apache.accumulo.core.master.thrift.TabletServerStatus)

Example 3 with DeadServerList

use of org.apache.accumulo.server.manager.state.DeadServerList in project accumulo by apache.

the class Manager method update.

@Override
public void update(LiveTServerSet current, Set<TServerInstance> deleted, Set<TServerInstance> added) {
    // if we have deleted or added tservers, then adjust our dead server list
    if (!deleted.isEmpty() || !added.isEmpty()) {
        DeadServerList obit = new DeadServerList(getContext());
        if (!added.isEmpty()) {
            log.info("New servers: {}", added);
            for (TServerInstance up : added) {
                obit.delete(up.getHostPort());
            }
        }
        for (TServerInstance dead : deleted) {
            String cause = "unexpected failure";
            if (serversToShutdown.contains(dead)) {
                // maybe an incorrect assumption
                cause = "clean shutdown";
            }
            if (!getManagerGoalState().equals(ManagerGoalState.CLEAN_STOP)) {
                obit.post(dead.getHostPort(), cause);
            }
        }
        Set<TServerInstance> unexpected = new HashSet<>(deleted);
        unexpected.removeAll(this.serversToShutdown);
        if (!unexpected.isEmpty() && (stillManager() && !getManagerGoalState().equals(ManagerGoalState.CLEAN_STOP))) {
            log.warn("Lost servers {}", unexpected);
        }
        serversToShutdown.removeAll(deleted);
        badServers.keySet().removeAll(deleted);
        // clear out any bad server with the same host/port as a new server
        synchronized (badServers) {
            cleanListByHostAndPort(badServers.keySet(), deleted, added);
        }
        synchronized (serversToShutdown) {
            cleanListByHostAndPort(serversToShutdown, deleted, added);
        }
        synchronized (migrations) {
            Iterator<Entry<KeyExtent, TServerInstance>> iter = migrations.entrySet().iterator();
            while (iter.hasNext()) {
                Entry<KeyExtent, TServerInstance> entry = iter.next();
                if (deleted.contains(entry.getValue())) {
                    log.info("Canceling migration of {} to {}", entry.getKey(), entry.getValue());
                    iter.remove();
                }
            }
        }
        nextEvent.event("There are now %d tablet servers", current.size());
    }
    // clear out any servers that are no longer current
    // this is needed when we are using a fate operation to shutdown a tserver as it
    // will continue to add the server to the serversToShutdown (ACCUMULO-4410)
    serversToShutdown.retainAll(current.getCurrentServers());
}
Also used : Entry(java.util.Map.Entry) DeadServerList(org.apache.accumulo.server.manager.state.DeadServerList) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) HashSet(java.util.HashSet)

Aggregations

DeadServerList (org.apache.accumulo.server.manager.state.DeadServerList)3 HashSet (java.util.HashSet)2 Entry (java.util.Map.Entry)2 KeyExtent (org.apache.accumulo.core.dataImpl.KeyExtent)2 TServerInstance (org.apache.accumulo.core.metadata.TServerInstance)2 ImmutableSortedMap (com.google.common.collect.ImmutableSortedMap)1 RateLimiter (com.google.common.util.concurrent.RateLimiter)1 SuppressFBWarnings (edu.umd.cs.findbugs.annotations.SuppressFBWarnings)1 Span (io.opentelemetry.api.trace.Span)1 Scope (io.opentelemetry.context.Scope)1 Consumes (jakarta.ws.rs.Consumes)1 POST (jakarta.ws.rs.POST)1 IOException (java.io.IOException)1 UnknownHostException (java.net.UnknownHostException)1 UTF_8 (java.nio.charset.StandardCharsets.UTF_8)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 Collections.emptySortedMap (java.util.Collections.emptySortedMap)1 HashMap (java.util.HashMap)1