use of org.apache.accumulo.core.spi.balancer.data.TServerStatus in project accumulo by apache.
the class Manager method gatherTableInformation.
private SortedMap<TServerInstance, TabletServerStatus> gatherTableInformation(Set<TServerInstance> currentServers, SortedMap<TabletServerId, TServerStatus> balancerMap) {
final long rpcTimeout = getConfiguration().getTimeInMillis(Property.GENERAL_RPC_TIMEOUT);
int threads = getConfiguration().getCount(Property.MANAGER_STATUS_THREAD_POOL_SIZE);
ExecutorService tp = ThreadPools.createExecutorService(getConfiguration(), Property.MANAGER_STATUS_THREAD_POOL_SIZE, false);
long start = System.currentTimeMillis();
final SortedMap<TServerInstance, TabletServerStatus> result = new ConcurrentSkipListMap<>();
final RateLimiter shutdownServerRateLimiter = RateLimiter.create(MAX_SHUTDOWNS_PER_SEC);
for (TServerInstance serverInstance : currentServers) {
final TServerInstance server = serverInstance;
if (threads == 0) {
// Since an unbounded thread pool is being used, rate limit how fast task are added to the
// executor. This prevents the threads from growing large unless there are lots of
// unresponsive tservers.
sleepUninterruptibly(Math.max(1, rpcTimeout / 120_000), TimeUnit.MILLISECONDS);
}
tp.submit(() -> {
try {
Thread t = Thread.currentThread();
String oldName = t.getName();
try {
String message = "Getting status from " + server;
t.setName(message);
long startForServer = System.currentTimeMillis();
log.trace(message);
TServerConnection connection1 = tserverSet.getConnection(server);
if (connection1 == null) {
throw new IOException("No connection to " + server);
}
TabletServerStatus status = connection1.getTableMap(false);
result.put(server, status);
long duration = System.currentTimeMillis() - startForServer;
log.trace("Got status from {} in {} ms", server, duration);
} finally {
t.setName(oldName);
}
} catch (Exception ex) {
log.error("unable to get tablet server status {} {}", server, ex.toString());
log.debug("unable to get tablet server status {}", server, ex);
// MAX_BAD_STATUS_COUNT times
if (badServers.computeIfAbsent(server, k -> new AtomicInteger(0)).incrementAndGet() > MAX_BAD_STATUS_COUNT) {
if (shutdownServerRateLimiter.tryAcquire()) {
log.warn("attempting to stop {}", server);
try {
TServerConnection connection2 = tserverSet.getConnection(server);
if (connection2 != null) {
connection2.halt(managerLock);
}
} catch (TTransportException e1) {
// ignore: it's probably down
} catch (Exception e2) {
log.info("error talking to troublesome tablet server", e2);
}
} else {
log.warn("Unable to shutdown {} as over the shutdown limit of {} per minute", server, MAX_SHUTDOWNS_PER_SEC * 60);
}
badServers.remove(server);
}
}
});
}
tp.shutdown();
try {
tp.awaitTermination(Math.max(10000, rpcTimeout / 3), TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
log.debug("Interrupted while fetching status");
}
tp.shutdownNow();
// Threads may still modify map after shutdownNow is called, so create an immutable snapshot.
SortedMap<TServerInstance, TabletServerStatus> info = ImmutableSortedMap.copyOf(result);
tserverStatus.forEach((tsi, status) -> balancerMap.put(new TabletServerIdImpl(tsi), TServerStatusImpl.fromThrift(status)));
synchronized (badServers) {
badServers.keySet().retainAll(currentServers);
badServers.keySet().removeAll(info.keySet());
}
log.debug(String.format("Finished gathering information from %d of %d servers in %.2f seconds", info.size(), currentServers.size(), (System.currentTimeMillis() - start) / 1000.));
return info;
}
use of org.apache.accumulo.core.spi.balancer.data.TServerStatus in project accumulo by apache.
the class BaseHostRegexTableLoadBalancerTest method createCurrent.
protected SortedMap<TabletServerId, TServerStatus> createCurrent(int numTservers) {
String base = "192.168.0.";
TreeMap<TabletServerId, TServerStatus> current = new TreeMap<>();
for (int i = 1; i <= numTservers; i++) {
TServerStatusImpl status = new TServerStatusImpl(new org.apache.accumulo.core.master.thrift.TabletServerStatus());
Map<String, TableStatistics> tableMap = new HashMap<>();
tableMap.put(FOO.getId().canonical(), new TableStatisticsImpl(new TableInfo()));
tableMap.put(BAR.getId().canonical(), new TableStatisticsImpl(new TableInfo()));
tableMap.put(BAZ.getId().canonical(), new TableStatisticsImpl(new TableInfo()));
status.setTableMap(tableMap);
current.put(new TabletServerIdImpl(base + i, 9997, Integer.toHexString(1)), status);
}
// now put all of the tablets on one server
for (Map.Entry<String, TabletServerId> entry : initialTableLocation.entrySet()) {
TServerStatus status = current.get(entry.getValue());
if (status != null) {
TableId tableId = environment.getTableIdMap().get(entry.getKey());
((TableStatisticsImpl) status.getTableMap().get(tableId.canonical())).setOnlineTabletCount(5);
}
}
return current;
}
use of org.apache.accumulo.core.spi.balancer.data.TServerStatus in project accumulo by apache.
the class HostRegexTableLoadBalancerTest method testUnassignedWithNoDefaultPool.
@Test
public void testUnassignedWithNoDefaultPool() {
init(DEFAULT_TABLE_PROPERTIES);
Map<TabletId, TabletServerId> assignments = new HashMap<>();
Map<TabletId, TabletServerId> unassigned = new HashMap<>();
for (TabletId tabletId : tableTablets.get(BAR.getTableName())) {
unassigned.put(tabletId, null);
}
SortedMap<TabletServerId, TServerStatus> current = createCurrent(15);
// Remove the BAR tablet servers and default pool from current
List<TabletServerId> removals = new ArrayList<>();
for (Entry<TabletServerId, TServerStatus> e : current.entrySet()) {
if (e.getKey().getHost().equals("192.168.0.6") || e.getKey().getHost().equals("192.168.0.7") || e.getKey().getHost().equals("192.168.0.8") || e.getKey().getHost().equals("192.168.0.9") || e.getKey().getHost().equals("192.168.0.10") || e.getKey().getHost().equals("192.168.0.11") || e.getKey().getHost().equals("192.168.0.12") || e.getKey().getHost().equals("192.168.0.13") || e.getKey().getHost().equals("192.168.0.14") || e.getKey().getHost().equals("192.168.0.15")) {
removals.add(e.getKey());
}
}
for (TabletServerId r : removals) {
current.remove(r);
}
this.getAssignments(new AssignmentParamsImpl(Collections.unmodifiableSortedMap(current), Collections.unmodifiableMap(unassigned), assignments));
assertEquals(unassigned.size(), assignments.size());
// Ensure tablets are assigned in default pool
for (Entry<TabletId, TabletServerId> e : assignments.entrySet()) {
if (tabletInBounds(e.getKey(), e.getValue())) {
fail("tablet unexpectedly in bounds: " + e.getKey() + " -> " + e.getValue().getHost());
}
}
}
use of org.apache.accumulo.core.spi.balancer.data.TServerStatus in project accumulo by apache.
the class HostRegexTableLoadBalancerTest method testUnassignedWithNoTServers.
@Test
public void testUnassignedWithNoTServers() {
init(DEFAULT_TABLE_PROPERTIES);
Map<TabletId, TabletServerId> assignments = new HashMap<>();
Map<TabletId, TabletServerId> unassigned = new HashMap<>();
for (TabletId tabletId : tableTablets.get(BAR.getTableName())) {
unassigned.put(tabletId, null);
}
SortedMap<TabletServerId, TServerStatus> current = createCurrent(15);
// Remove the BAR tablet servers from current
List<TabletServerId> removals = new ArrayList<>();
for (Entry<TabletServerId, TServerStatus> e : current.entrySet()) {
if (e.getKey().getHost().equals("192.168.0.6") || e.getKey().getHost().equals("192.168.0.7") || e.getKey().getHost().equals("192.168.0.8") || e.getKey().getHost().equals("192.168.0.9") || e.getKey().getHost().equals("192.168.0.10")) {
removals.add(e.getKey());
}
}
for (TabletServerId r : removals) {
current.remove(r);
}
this.getAssignments(new AssignmentParamsImpl(Collections.unmodifiableSortedMap(current), Collections.unmodifiableMap(unassigned), assignments));
assertEquals(unassigned.size(), assignments.size());
// Ensure tablets are assigned in default pool
for (Entry<TabletId, TabletServerId> e : assignments.entrySet()) {
if (tabletInBounds(e.getKey(), e.getValue())) {
fail("tablet unexpectedly in bounds: " + e.getKey() + " -> " + e.getValue().getHost());
}
}
}
use of org.apache.accumulo.core.spi.balancer.data.TServerStatus in project accumulo by apache.
the class ChaoticLoadBalancer method balance.
@Override
public long balance(BalanceParameters params) {
Map<TabletServerId, Long> numTablets = new HashMap<>();
List<TabletServerId> underCapacityTServer = new ArrayList<>();
if (!params.currentMigrations().isEmpty()) {
outstandingMigrationsProblem.setMigrations(params.currentMigrations());
problemReporter.reportProblem(outstandingMigrationsProblem);
return 100;
}
problemReporter.clearProblemReportTimes();
boolean moveMetadata = random.nextInt(4) == 0;
long totalTablets = 0;
for (Entry<TabletServerId, TServerStatus> e : params.currentStatus().entrySet()) {
long tabletCount = 0;
for (TableStatistics ti : e.getValue().getTableMap().values()) {
tabletCount += ti.getTabletCount();
}
numTablets.put(e.getKey(), tabletCount);
underCapacityTServer.add(e.getKey());
totalTablets += tabletCount;
}
// totalTablets is fuzzy due to asynchronicity of the stats
// *1.2 to handle fuzziness, and prevent locking for 'perfect' balancing scenarios
long avg = (long) Math.ceil(((double) totalTablets) / params.currentStatus().size() * 1.2);
for (Entry<TabletServerId, TServerStatus> e : params.currentStatus().entrySet()) {
for (String tableId : e.getValue().getTableMap().keySet()) {
TableId id = TableId.of(tableId);
if (!moveMetadata && MetadataTable.ID.equals(id))
continue;
try {
for (TabletStatistics ts : getOnlineTabletsForTable(e.getKey(), id)) {
int index = random.nextInt(underCapacityTServer.size());
TabletServerId dest = underCapacityTServer.get(index);
if (dest.equals(e.getKey()))
continue;
params.migrationsOut().add(new TabletMigration(ts.getTabletId(), e.getKey(), dest));
if (numTablets.put(dest, numTablets.get(dest) + 1) > avg)
underCapacityTServer.remove(index);
if (numTablets.put(e.getKey(), numTablets.get(e.getKey()) - 1) <= avg && !underCapacityTServer.contains(e.getKey()))
underCapacityTServer.add(e.getKey());
// option!
if (underCapacityTServer.isEmpty())
underCapacityTServer.addAll(numTablets.keySet());
}
} catch (AccumuloSecurityException e1) {
// Shouldn't happen, but carry on if it does
log.debug("Encountered AccumuloSecurityException. This should not happen. Carrying on anyway.", e1);
} catch (AccumuloException e1) {
// Shouldn't happen, but carry on if it does
log.debug("Encountered AccumuloException. This should not happen. Carrying on anyway.", e1);
}
}
}
return 100;
}
Aggregations