Search in sources :

Example 1 with TabletServerId

use of org.apache.accumulo.core.spi.balancer.data.TabletServerId in project accumulo by apache.

the class GroupBalancer method populateMigrations.

private void populateMigrations(Set<TabletServerId> current, List<TabletMigration> migrationsOut, Moves moves) {
    if (moves.size() == 0) {
        return;
    }
    Function<TabletId, String> partitioner = getPartitioner();
    for (var tablet : getLocationProvider().entrySet()) {
        String group = partitioner.apply(tablet.getKey());
        var loc = tablet.getValue();
        if (loc == null || !current.contains(loc)) {
            migrationsOut.clear();
            return;
        }
        TabletServerId dest = moves.removeMove(loc, group);
        if (dest != null) {
            migrationsOut.add(new TabletMigration(tablet.getKey(), loc, dest));
            if (moves.size() == 0) {
                break;
            }
        }
    }
}
Also used : TabletMigration(org.apache.accumulo.core.spi.balancer.data.TabletMigration) TabletServerId(org.apache.accumulo.core.spi.balancer.data.TabletServerId) TabletId(org.apache.accumulo.core.data.TabletId)

Example 2 with TabletServerId

use of org.apache.accumulo.core.spi.balancer.data.TabletServerId in project accumulo by apache.

the class GroupBalancer method balance.

@Override
public long balance(BalanceParameters params) {
    if (!shouldBalance(params.currentStatus(), params.currentMigrations())) {
        return 5000;
    }
    if (System.currentTimeMillis() - lastRun < getWaitTime()) {
        return 5000;
    }
    MapCounter<String> groupCounts = new MapCounter<>();
    Map<TabletServerId, TserverGroupInfo> tservers = new HashMap<>();
    for (TabletServerId tsi : params.currentStatus().keySet()) {
        tservers.put(tsi, new TserverGroupInfo(tsi));
    }
    Function<TabletId, String> partitioner = getPartitioner();
    // collect stats about current state
    for (var tablet : getLocationProvider().entrySet()) {
        String group = partitioner.apply(tablet.getKey());
        var loc = tablet.getValue();
        if (loc == null || !tservers.containsKey(loc)) {
            return 5000;
        }
        groupCounts.increment(group, 1);
        TserverGroupInfo tgi = tservers.get(loc);
        tgi.addGroup(group);
    }
    Map<String, Integer> expectedCounts = new HashMap<>();
    int totalExtra = 0;
    for (String group : groupCounts.keySet()) {
        int groupCount = groupCounts.getInt(group);
        totalExtra += groupCount % params.currentStatus().size();
        expectedCounts.put(group, (groupCount / params.currentStatus().size()));
    }
    // The number of extra tablets from all groups that each tserver must have.
    int expectedExtra = totalExtra / params.currentStatus().size();
    int maxExtraGroups = expectedExtra + 1;
    expectedCounts = Collections.unmodifiableMap(expectedCounts);
    tservers = Collections.unmodifiableMap(tservers);
    for (TserverGroupInfo tgi : tservers.values()) {
        tgi.finishedAdding(expectedCounts);
    }
    Moves moves = new Moves();
    // The order of the following steps is important, because as ordered each step should not move
    // any tablets moved by a previous step.
    balanceExpected(tservers, moves);
    if (moves.size() < getMaxMigrations()) {
        balanceExtraExpected(tservers, expectedExtra, moves);
        if (moves.size() < getMaxMigrations()) {
            boolean cont = balanceExtraMultiple(tservers, maxExtraGroups, moves);
            if (cont && moves.size() < getMaxMigrations()) {
                balanceExtraExtra(tservers, maxExtraGroups, moves);
            }
        }
    }
    populateMigrations(tservers.keySet(), params.migrationsOut(), moves);
    lastRun = System.currentTimeMillis();
    return 5000;
}
Also used : HashMap(java.util.HashMap) TabletServerId(org.apache.accumulo.core.spi.balancer.data.TabletServerId) MapCounter(org.apache.accumulo.core.util.MapCounter) TabletId(org.apache.accumulo.core.data.TabletId)

Example 3 with TabletServerId

use of org.apache.accumulo.core.spi.balancer.data.TabletServerId in project accumulo by apache.

the class TableLoadBalancer method getAssignments.

@Override
public void getAssignments(AssignmentParameters params) {
    // separate the unassigned into tables
    Map<TableId, Map<TabletId, TabletServerId>> groupedUnassigned = new HashMap<>();
    params.unassignedTablets().forEach((tid, lastTserver) -> groupedUnassigned.computeIfAbsent(tid.getTable(), k -> new HashMap<>()).put(tid, lastTserver));
    for (Entry<TableId, Map<TabletId, TabletServerId>> e : groupedUnassigned.entrySet()) {
        Map<TabletId, TabletServerId> newAssignments = new HashMap<>();
        getBalancerForTable(e.getKey()).getAssignments(new AssignmentParamsImpl(params.currentStatus(), e.getValue(), newAssignments));
        newAssignments.forEach(params::addAssignment);
    }
}
Also used : TableId(org.apache.accumulo.core.data.TableId) HashMap(java.util.HashMap) TabletServerId(org.apache.accumulo.core.spi.balancer.data.TabletServerId) TabletId(org.apache.accumulo.core.data.TabletId) AssignmentParamsImpl(org.apache.accumulo.core.manager.balancer.AssignmentParamsImpl) HashMap(java.util.HashMap) Map(java.util.Map)

Example 4 with TabletServerId

use of org.apache.accumulo.core.spi.balancer.data.TabletServerId in project accumulo by apache.

the class Manager method gatherTableInformation.

private SortedMap<TServerInstance, TabletServerStatus> gatherTableInformation(Set<TServerInstance> currentServers, SortedMap<TabletServerId, TServerStatus> balancerMap) {
    final long rpcTimeout = getConfiguration().getTimeInMillis(Property.GENERAL_RPC_TIMEOUT);
    int threads = getConfiguration().getCount(Property.MANAGER_STATUS_THREAD_POOL_SIZE);
    ExecutorService tp = ThreadPools.createExecutorService(getConfiguration(), Property.MANAGER_STATUS_THREAD_POOL_SIZE, false);
    long start = System.currentTimeMillis();
    final SortedMap<TServerInstance, TabletServerStatus> result = new ConcurrentSkipListMap<>();
    final RateLimiter shutdownServerRateLimiter = RateLimiter.create(MAX_SHUTDOWNS_PER_SEC);
    for (TServerInstance serverInstance : currentServers) {
        final TServerInstance server = serverInstance;
        if (threads == 0) {
            // Since an unbounded thread pool is being used, rate limit how fast task are added to the
            // executor. This prevents the threads from growing large unless there are lots of
            // unresponsive tservers.
            sleepUninterruptibly(Math.max(1, rpcTimeout / 120_000), TimeUnit.MILLISECONDS);
        }
        tp.submit(() -> {
            try {
                Thread t = Thread.currentThread();
                String oldName = t.getName();
                try {
                    String message = "Getting status from " + server;
                    t.setName(message);
                    long startForServer = System.currentTimeMillis();
                    log.trace(message);
                    TServerConnection connection1 = tserverSet.getConnection(server);
                    if (connection1 == null) {
                        throw new IOException("No connection to " + server);
                    }
                    TabletServerStatus status = connection1.getTableMap(false);
                    result.put(server, status);
                    long duration = System.currentTimeMillis() - startForServer;
                    log.trace("Got status from {} in {} ms", server, duration);
                } finally {
                    t.setName(oldName);
                }
            } catch (Exception ex) {
                log.error("unable to get tablet server status {} {}", server, ex.toString());
                log.debug("unable to get tablet server status {}", server, ex);
                // MAX_BAD_STATUS_COUNT times
                if (badServers.computeIfAbsent(server, k -> new AtomicInteger(0)).incrementAndGet() > MAX_BAD_STATUS_COUNT) {
                    if (shutdownServerRateLimiter.tryAcquire()) {
                        log.warn("attempting to stop {}", server);
                        try {
                            TServerConnection connection2 = tserverSet.getConnection(server);
                            if (connection2 != null) {
                                connection2.halt(managerLock);
                            }
                        } catch (TTransportException e1) {
                        // ignore: it's probably down
                        } catch (Exception e2) {
                            log.info("error talking to troublesome tablet server", e2);
                        }
                    } else {
                        log.warn("Unable to shutdown {} as over the shutdown limit of {} per minute", server, MAX_SHUTDOWNS_PER_SEC * 60);
                    }
                    badServers.remove(server);
                }
            }
        });
    }
    tp.shutdown();
    try {
        tp.awaitTermination(Math.max(10000, rpcTimeout / 3), TimeUnit.MILLISECONDS);
    } catch (InterruptedException e) {
        log.debug("Interrupted while fetching status");
    }
    tp.shutdownNow();
    // Threads may still modify map after shutdownNow is called, so create an immutable snapshot.
    SortedMap<TServerInstance, TabletServerStatus> info = ImmutableSortedMap.copyOf(result);
    tserverStatus.forEach((tsi, status) -> balancerMap.put(new TabletServerIdImpl(tsi), TServerStatusImpl.fromThrift(status)));
    synchronized (badServers) {
        badServers.keySet().retainAll(currentServers);
        badServers.keySet().removeAll(info.keySet());
    }
    log.debug(String.format("Finished gathering information from %d of %d servers in %.2f seconds", info.size(), currentServers.size(), (System.currentTimeMillis() - start) / 1000.));
    return info;
}
Also used : MergeInfo(org.apache.accumulo.server.manager.state.MergeInfo) TableInfo(org.apache.accumulo.core.master.thrift.TableInfo) ManagerMonitorInfo(org.apache.accumulo.core.manager.thrift.ManagerMonitorInfo) TServer(org.apache.thrift.server.TServer) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) Future(java.util.concurrent.Future) MergeState(org.apache.accumulo.server.manager.state.MergeState) Map(java.util.Map) RootTable(org.apache.accumulo.core.metadata.RootTable) ServerBulkImportStatus(org.apache.accumulo.server.util.ServerBulkImportStatus) ThriftServerType(org.apache.accumulo.server.rpc.ThriftServerType) ServerAddress(org.apache.accumulo.server.rpc.ServerAddress) BulkImportState(org.apache.accumulo.core.master.thrift.BulkImportState) Property(org.apache.accumulo.core.conf.Property) ServiceLockPath(org.apache.accumulo.fate.zookeeper.ServiceLock.ServiceLockPath) InstanceId(org.apache.accumulo.core.data.InstanceId) TableState(org.apache.accumulo.core.manager.state.tables.TableState) VolumeManager(org.apache.accumulo.server.fs.VolumeManager) UpgradeCoordinator(org.apache.accumulo.manager.upgrade.UpgradeCoordinator) Set(java.util.Set) AccumuloClient(org.apache.accumulo.core.client.AccumuloClient) TabletState(org.apache.accumulo.core.metadata.TabletState) NodeExistsPolicy(org.apache.accumulo.fate.zookeeper.ZooUtil.NodeExistsPolicy) TabletServerIdImpl(org.apache.accumulo.core.manager.balancer.TabletServerIdImpl) AuthenticationTokenSecretManager(org.apache.accumulo.server.security.delegation.AuthenticationTokenSecretManager) ManagerState(org.apache.accumulo.core.manager.thrift.ManagerState) ServiceLock(org.apache.accumulo.fate.zookeeper.ServiceLock) HighlyAvailableServiceWrapper(org.apache.accumulo.server.rpc.HighlyAvailableServiceWrapper) AuditedSecurityOperation(org.apache.accumulo.server.security.AuditedSecurityOperation) UtilWaitThread.sleepUninterruptibly(org.apache.accumulo.fate.util.UtilWaitThread.sleepUninterruptibly) NoAuthException(org.apache.zookeeper.KeeperException.NoAuthException) Scanner(org.apache.accumulo.core.client.Scanner) TableObserver(org.apache.accumulo.server.tables.TableObserver) SecurityOperation(org.apache.accumulo.server.security.SecurityOperation) TabletLocationState(org.apache.accumulo.core.metadata.TabletLocationState) Threads(org.apache.accumulo.core.util.threads.Threads) ZooUtil(org.apache.accumulo.fate.zookeeper.ZooUtil) AbstractServer(org.apache.accumulo.server.AbstractServer) TabletServerId(org.apache.accumulo.core.spi.balancer.data.TabletServerId) RateLimiter(com.google.common.util.concurrent.RateLimiter) ArrayList(java.util.ArrayList) AssignmentParamsImpl(org.apache.accumulo.core.manager.balancer.AssignmentParamsImpl) MetricsUtil(org.apache.accumulo.core.metrics.MetricsUtil) Iface(org.apache.accumulo.core.manager.thrift.ManagerClientService.Iface) Collections.emptySortedMap(java.util.Collections.emptySortedMap) ManagerGoalState(org.apache.accumulo.core.manager.thrift.ManagerGoalState) Key(org.apache.accumulo.core.data.Key) Fate(org.apache.accumulo.fate.Fate) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) ServerContext(org.apache.accumulo.server.ServerContext) Watcher(org.apache.zookeeper.Watcher) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) TException(org.apache.thrift.TException) IOException(java.io.IOException) ManagerMetrics(org.apache.accumulo.manager.metrics.ManagerMetrics) UnknownHostException(java.net.UnknownHostException) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) ExecutionException(java.util.concurrent.ExecutionException) TabletServerState(org.apache.accumulo.server.manager.state.TabletServerState) CurrentState(org.apache.accumulo.server.manager.state.CurrentState) TreeMap(java.util.TreeMap) Processor(org.apache.accumulo.core.manager.thrift.ManagerClientService.Processor) TabletBalancer(org.apache.accumulo.core.spi.balancer.TabletBalancer) TableId(org.apache.accumulo.core.data.TableId) ServerOpts(org.apache.accumulo.server.ServerOpts) LockLossReason(org.apache.accumulo.fate.zookeeper.ServiceLock.LockLossReason) TabletServerStatus(org.apache.accumulo.core.master.thrift.TabletServerStatus) TTransportException(org.apache.thrift.transport.TTransportException) TCredentialsUpdatingWrapper(org.apache.accumulo.server.rpc.TCredentialsUpdatingWrapper) LoggerFactory(org.slf4j.LoggerFactory) MetadataTable(org.apache.accumulo.core.metadata.MetadataTable) TServerUtils(org.apache.accumulo.server.rpc.TServerUtils) ZooAuthenticationKeyDistributor(org.apache.accumulo.server.security.delegation.ZooAuthenticationKeyDistributor) TServerStatus(org.apache.accumulo.core.spi.balancer.data.TServerStatus) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) RecoveryManager(org.apache.accumulo.manager.recovery.RecoveryManager) LiveTServerSet(org.apache.accumulo.server.manager.LiveTServerSet) Value(org.apache.accumulo.core.data.Value) TUnloadTabletGoal(org.apache.accumulo.core.tabletserver.thrift.TUnloadTabletGoal) SimpleLoadBalancer(org.apache.accumulo.core.spi.balancer.SimpleLoadBalancer) Span(io.opentelemetry.api.trace.Span) Collection(java.util.Collection) ThreadPools(org.apache.accumulo.core.util.threads.ThreadPools) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) TableCounts(org.apache.accumulo.manager.state.TableCounts) List(java.util.List) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) Entry(java.util.Map.Entry) TraceUtil(org.apache.accumulo.core.trace.TraceUtil) AuthenticationTokenKeyManager(org.apache.accumulo.server.security.delegation.AuthenticationTokenKeyManager) TableInfoUtil(org.apache.accumulo.server.util.TableInfoUtil) SortedMap(java.util.SortedMap) TableManager(org.apache.accumulo.server.tables.TableManager) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) BalancerEnvironment(org.apache.accumulo.core.spi.balancer.BalancerEnvironment) TServerStatusImpl(org.apache.accumulo.core.manager.balancer.TServerStatusImpl) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) AtomicReference(java.util.concurrent.atomic.AtomicReference) HashSet(java.util.HashSet) Halt(org.apache.accumulo.core.util.Halt) DeadServerList(org.apache.accumulo.server.manager.state.DeadServerList) TableOperationExceptionType(org.apache.accumulo.core.clientImpl.thrift.TableOperationExceptionType) ZooReaderWriter(org.apache.accumulo.fate.zookeeper.ZooReaderWriter) NodeMissingPolicy(org.apache.accumulo.fate.zookeeper.ZooUtil.NodeMissingPolicy) BalanceParamsImpl(org.apache.accumulo.core.manager.balancer.BalanceParamsImpl) ExecutorService(java.util.concurrent.ExecutorService) TServerConnection(org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection) Retry(org.apache.accumulo.fate.util.Retry) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) KeeperException(org.apache.zookeeper.KeeperException) Scope(io.opentelemetry.context.Scope) UTF_8(java.nio.charset.StandardCharsets.UTF_8) DataLevel(org.apache.accumulo.core.metadata.schema.Ample.DataLevel) HighlyAvailableService(org.apache.accumulo.server.HighlyAvailableService) TabletStateStore(org.apache.accumulo.server.manager.state.TabletStateStore) TabletMigration(org.apache.accumulo.core.spi.balancer.data.TabletMigration) Constants(org.apache.accumulo.core.Constants) Authorizations(org.apache.accumulo.core.security.Authorizations) WatchedEvent(org.apache.zookeeper.WatchedEvent) TimeUnit(java.util.concurrent.TimeUnit) TableOperation(org.apache.accumulo.core.clientImpl.thrift.TableOperation) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) ThriftTableOperationException(org.apache.accumulo.core.clientImpl.thrift.ThriftTableOperationException) AgeOffStore(org.apache.accumulo.fate.AgeOffStore) BalancerEnvironmentImpl(org.apache.accumulo.server.manager.balancer.BalancerEnvironmentImpl) TabletColumnFamily(org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily) TraceRepo(org.apache.accumulo.manager.tableOps.TraceRepo) Collections(java.util.Collections) ReplicationCoordinator(org.apache.accumulo.core.replication.thrift.ReplicationCoordinator) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) TTransportException(org.apache.thrift.transport.TTransportException) IOException(java.io.IOException) TabletServerIdImpl(org.apache.accumulo.core.manager.balancer.TabletServerIdImpl) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) RateLimiter(com.google.common.util.concurrent.RateLimiter) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) NoAuthException(org.apache.zookeeper.KeeperException.NoAuthException) TException(org.apache.thrift.TException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) ExecutionException(java.util.concurrent.ExecutionException) TTransportException(org.apache.thrift.transport.TTransportException) KeeperException(org.apache.zookeeper.KeeperException) ThriftTableOperationException(org.apache.accumulo.core.clientImpl.thrift.ThriftTableOperationException) TServerConnection(org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ExecutorService(java.util.concurrent.ExecutorService) TabletServerStatus(org.apache.accumulo.core.master.thrift.TabletServerStatus)

Example 5 with TabletServerId

use of org.apache.accumulo.core.spi.balancer.data.TabletServerId in project accumulo by apache.

the class BaseHostRegexTableLoadBalancerTest method createCurrent.

protected SortedMap<TabletServerId, TServerStatus> createCurrent(int numTservers) {
    String base = "192.168.0.";
    TreeMap<TabletServerId, TServerStatus> current = new TreeMap<>();
    for (int i = 1; i <= numTservers; i++) {
        TServerStatusImpl status = new TServerStatusImpl(new org.apache.accumulo.core.master.thrift.TabletServerStatus());
        Map<String, TableStatistics> tableMap = new HashMap<>();
        tableMap.put(FOO.getId().canonical(), new TableStatisticsImpl(new TableInfo()));
        tableMap.put(BAR.getId().canonical(), new TableStatisticsImpl(new TableInfo()));
        tableMap.put(BAZ.getId().canonical(), new TableStatisticsImpl(new TableInfo()));
        status.setTableMap(tableMap);
        current.put(new TabletServerIdImpl(base + i, 9997, Integer.toHexString(1)), status);
    }
    // now put all of the tablets on one server
    for (Map.Entry<String, TabletServerId> entry : initialTableLocation.entrySet()) {
        TServerStatus status = current.get(entry.getValue());
        if (status != null) {
            TableId tableId = environment.getTableIdMap().get(entry.getKey());
            ((TableStatisticsImpl) status.getTableMap().get(tableId.canonical())).setOnlineTabletCount(5);
        }
    }
    return current;
}
Also used : TableId(org.apache.accumulo.core.data.TableId) HashMap(java.util.HashMap) TServerStatus(org.apache.accumulo.core.spi.balancer.data.TServerStatus) TreeMap(java.util.TreeMap) TabletServerIdImpl(org.apache.accumulo.core.manager.balancer.TabletServerIdImpl) TServerStatusImpl(org.apache.accumulo.core.manager.balancer.TServerStatusImpl) TableStatisticsImpl(org.apache.accumulo.core.manager.balancer.TableStatisticsImpl) TabletServerId(org.apache.accumulo.core.spi.balancer.data.TabletServerId) TableStatistics(org.apache.accumulo.core.spi.balancer.data.TableStatistics) TableInfo(org.apache.accumulo.core.master.thrift.TableInfo) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) Map(java.util.Map) SortedMap(java.util.SortedMap)

Aggregations

TabletServerId (org.apache.accumulo.core.spi.balancer.data.TabletServerId)26 TabletId (org.apache.accumulo.core.data.TabletId)22 HashMap (java.util.HashMap)17 ArrayList (java.util.ArrayList)14 TabletMigration (org.apache.accumulo.core.spi.balancer.data.TabletMigration)11 AssignmentParamsImpl (org.apache.accumulo.core.manager.balancer.AssignmentParamsImpl)10 TabletServerIdImpl (org.apache.accumulo.core.manager.balancer.TabletServerIdImpl)10 TServerStatus (org.apache.accumulo.core.spi.balancer.data.TServerStatus)10 Test (org.junit.jupiter.api.Test)10 TableId (org.apache.accumulo.core.data.TableId)8 TreeMap (java.util.TreeMap)7 BalanceParamsImpl (org.apache.accumulo.core.manager.balancer.BalanceParamsImpl)7 Map (java.util.Map)6 SortedMap (java.util.SortedMap)5 TServerStatusImpl (org.apache.accumulo.core.manager.balancer.TServerStatusImpl)4 TabletStatistics (org.apache.accumulo.core.spi.balancer.data.TabletStatistics)4 KeyExtent (org.apache.accumulo.core.dataImpl.KeyExtent)3 TableInfo (org.apache.accumulo.core.master.thrift.TableInfo)3 TableStatistics (org.apache.accumulo.core.spi.balancer.data.TableStatistics)3 Collections (java.util.Collections)2