use of org.apache.accumulo.core.manager.balancer.TabletServerIdImpl in project accumulo by apache.
the class Manager method gatherTableInformation.
private SortedMap<TServerInstance, TabletServerStatus> gatherTableInformation(Set<TServerInstance> currentServers, SortedMap<TabletServerId, TServerStatus> balancerMap) {
final long rpcTimeout = getConfiguration().getTimeInMillis(Property.GENERAL_RPC_TIMEOUT);
int threads = getConfiguration().getCount(Property.MANAGER_STATUS_THREAD_POOL_SIZE);
ExecutorService tp = ThreadPools.createExecutorService(getConfiguration(), Property.MANAGER_STATUS_THREAD_POOL_SIZE, false);
long start = System.currentTimeMillis();
final SortedMap<TServerInstance, TabletServerStatus> result = new ConcurrentSkipListMap<>();
final RateLimiter shutdownServerRateLimiter = RateLimiter.create(MAX_SHUTDOWNS_PER_SEC);
for (TServerInstance serverInstance : currentServers) {
final TServerInstance server = serverInstance;
if (threads == 0) {
// Since an unbounded thread pool is being used, rate limit how fast task are added to the
// executor. This prevents the threads from growing large unless there are lots of
// unresponsive tservers.
sleepUninterruptibly(Math.max(1, rpcTimeout / 120_000), TimeUnit.MILLISECONDS);
}
tp.submit(() -> {
try {
Thread t = Thread.currentThread();
String oldName = t.getName();
try {
String message = "Getting status from " + server;
t.setName(message);
long startForServer = System.currentTimeMillis();
log.trace(message);
TServerConnection connection1 = tserverSet.getConnection(server);
if (connection1 == null) {
throw new IOException("No connection to " + server);
}
TabletServerStatus status = connection1.getTableMap(false);
result.put(server, status);
long duration = System.currentTimeMillis() - startForServer;
log.trace("Got status from {} in {} ms", server, duration);
} finally {
t.setName(oldName);
}
} catch (Exception ex) {
log.error("unable to get tablet server status {} {}", server, ex.toString());
log.debug("unable to get tablet server status {}", server, ex);
// MAX_BAD_STATUS_COUNT times
if (badServers.computeIfAbsent(server, k -> new AtomicInteger(0)).incrementAndGet() > MAX_BAD_STATUS_COUNT) {
if (shutdownServerRateLimiter.tryAcquire()) {
log.warn("attempting to stop {}", server);
try {
TServerConnection connection2 = tserverSet.getConnection(server);
if (connection2 != null) {
connection2.halt(managerLock);
}
} catch (TTransportException e1) {
// ignore: it's probably down
} catch (Exception e2) {
log.info("error talking to troublesome tablet server", e2);
}
} else {
log.warn("Unable to shutdown {} as over the shutdown limit of {} per minute", server, MAX_SHUTDOWNS_PER_SEC * 60);
}
badServers.remove(server);
}
}
});
}
tp.shutdown();
try {
tp.awaitTermination(Math.max(10000, rpcTimeout / 3), TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
log.debug("Interrupted while fetching status");
}
tp.shutdownNow();
// Threads may still modify map after shutdownNow is called, so create an immutable snapshot.
SortedMap<TServerInstance, TabletServerStatus> info = ImmutableSortedMap.copyOf(result);
tserverStatus.forEach((tsi, status) -> balancerMap.put(new TabletServerIdImpl(tsi), TServerStatusImpl.fromThrift(status)));
synchronized (badServers) {
badServers.keySet().retainAll(currentServers);
badServers.keySet().removeAll(info.keySet());
}
log.debug(String.format("Finished gathering information from %d of %d servers in %.2f seconds", info.size(), currentServers.size(), (System.currentTimeMillis() - start) / 1000.));
return info;
}
use of org.apache.accumulo.core.manager.balancer.TabletServerIdImpl in project accumulo by apache.
the class BaseHostRegexTableLoadBalancerTest method createCurrent.
protected SortedMap<TabletServerId, TServerStatus> createCurrent(int numTservers) {
String base = "192.168.0.";
TreeMap<TabletServerId, TServerStatus> current = new TreeMap<>();
for (int i = 1; i <= numTservers; i++) {
TServerStatusImpl status = new TServerStatusImpl(new org.apache.accumulo.core.master.thrift.TabletServerStatus());
Map<String, TableStatistics> tableMap = new HashMap<>();
tableMap.put(FOO.getId().canonical(), new TableStatisticsImpl(new TableInfo()));
tableMap.put(BAR.getId().canonical(), new TableStatisticsImpl(new TableInfo()));
tableMap.put(BAZ.getId().canonical(), new TableStatisticsImpl(new TableInfo()));
status.setTableMap(tableMap);
current.put(new TabletServerIdImpl(base + i, 9997, Integer.toHexString(1)), status);
}
// now put all of the tablets on one server
for (Map.Entry<String, TabletServerId> entry : initialTableLocation.entrySet()) {
TServerStatus status = current.get(entry.getValue());
if (status != null) {
TableId tableId = environment.getTableIdMap().get(entry.getKey());
((TableStatisticsImpl) status.getTableMap().get(tableId.canonical())).setOnlineTabletCount(5);
}
}
return current;
}
use of org.apache.accumulo.core.manager.balancer.TabletServerIdImpl in project accumulo by apache.
the class SimpleLoadBalancerTest method testAssignMigrations.
@Test
public void testAssignMigrations() {
servers.put(new TabletServerIdImpl("127.0.0.1", 1234, "a"), new FakeTServer());
servers.put(new TabletServerIdImpl("127.0.0.2", 1234, "b"), new FakeTServer());
servers.put(new TabletServerIdImpl("127.0.0.3", 1234, "c"), new FakeTServer());
List<TabletId> metadataTable = new ArrayList<>();
String table = "t1";
metadataTable.add(makeTablet(table, null, null));
table = "t2";
metadataTable.add(makeTablet(table, "a", null));
metadataTable.add(makeTablet(table, null, "a"));
table = "t3";
metadataTable.add(makeTablet(table, "a", null));
metadataTable.add(makeTablet(table, "b", "a"));
metadataTable.add(makeTablet(table, "c", "b"));
metadataTable.add(makeTablet(table, "d", "c"));
metadataTable.add(makeTablet(table, "e", "d"));
metadataTable.add(makeTablet(table, null, "e"));
Collections.sort(metadataTable);
TestSimpleLoadBalancer balancer = new TestSimpleLoadBalancer();
SortedMap<TabletServerId, TServerStatus> current = new TreeMap<>();
for (Entry<TabletServerId, FakeTServer> entry : servers.entrySet()) {
current.put(entry.getKey(), entry.getValue().getStatus());
}
assignTablets(metadataTable, servers, current, balancer);
// Verify that the counts on the tables are correct
Map<String, Integer> expectedCounts = new HashMap<>();
expectedCounts.put("t1", 1);
expectedCounts.put("t2", 1);
expectedCounts.put("t3", 2);
checkBalance(metadataTable, servers, expectedCounts);
// Rebalance once
for (Entry<TabletServerId, FakeTServer> entry : servers.entrySet()) {
current.put(entry.getKey(), entry.getValue().getStatus());
}
// Nothing should happen, we are balanced
ArrayList<TabletMigration> out = new ArrayList<>();
balancer.getMigrations(current, out);
assertEquals(out.size(), 0);
// Take down a tabletServer
TabletServerId first = current.keySet().iterator().next();
current.remove(first);
FakeTServer remove = servers.remove(first);
// reassign offline extents
assignTablets(remove.tablets, servers, current, balancer);
checkBalance(metadataTable, servers, null);
}
use of org.apache.accumulo.core.manager.balancer.TabletServerIdImpl in project accumulo by apache.
the class ChaoticLoadBalancerTest method testAssignMigrations.
@Test
public void testAssignMigrations() {
servers.clear();
servers.put(new TabletServerIdImpl("127.0.0.1", 1234, "a"), new FakeTServer());
servers.put(new TabletServerIdImpl("127.0.0.1", 1235, "b"), new FakeTServer());
servers.put(new TabletServerIdImpl("127.0.0.1", 1236, "c"), new FakeTServer());
Map<TabletId, TabletServerId> metadataTable = new TreeMap<>();
String table = "t1";
metadataTable.put(makeTablet(table, null, null), null);
table = "t2";
metadataTable.put(makeTablet(table, "a", null), null);
metadataTable.put(makeTablet(table, null, "a"), null);
table = "t3";
metadataTable.put(makeTablet(table, "a", null), null);
metadataTable.put(makeTablet(table, "b", "a"), null);
metadataTable.put(makeTablet(table, "c", "b"), null);
metadataTable.put(makeTablet(table, "d", "c"), null);
metadataTable.put(makeTablet(table, "e", "d"), null);
metadataTable.put(makeTablet(table, null, "e"), null);
TestChaoticLoadBalancer balancer = new TestChaoticLoadBalancer();
Map<TabletId, TabletServerId> assignments = new HashMap<>();
balancer.getAssignments(new AssignmentParamsImpl(getAssignments(servers), metadataTable, assignments));
assertEquals(assignments.size(), metadataTable.size());
}
use of org.apache.accumulo.core.manager.balancer.TabletServerIdImpl in project accumulo by apache.
the class ChaoticLoadBalancerTest method testUnevenAssignment.
@Test
public void testUnevenAssignment() {
servers.clear();
for (char c : "abcdefghijklmnopqrstuvwxyz".toCharArray()) {
String cString = Character.toString(c);
TabletServerId tsi = new TabletServerIdImpl("127.0.0.1", c, cString);
FakeTServer fakeTServer = new FakeTServer();
servers.put(tsi, fakeTServer);
fakeTServer.tablets.add(makeTablet(cString, null, null));
}
// Put more tablets on one server, but not more than the number of servers
Entry<TabletServerId, FakeTServer> first = servers.entrySet().iterator().next();
first.getValue().tablets.add(makeTablet("newTable", "a", null));
first.getValue().tablets.add(makeTablet("newTable", "b", "a"));
first.getValue().tablets.add(makeTablet("newTable", "c", "b"));
first.getValue().tablets.add(makeTablet("newTable", "d", "c"));
first.getValue().tablets.add(makeTablet("newTable", "e", "d"));
first.getValue().tablets.add(makeTablet("newTable", "f", "e"));
first.getValue().tablets.add(makeTablet("newTable", "g", "f"));
first.getValue().tablets.add(makeTablet("newTable", "h", "g"));
first.getValue().tablets.add(makeTablet("newTable", "i", null));
TestChaoticLoadBalancer balancer = new TestChaoticLoadBalancer();
Set<TabletId> migrations = Collections.emptySet();
// Just want to make sure it gets some migrations, randomness prevents guarantee of a defined
// amount, or even expected amount
List<TabletMigration> migrationsOut = new ArrayList<>();
while (!migrationsOut.isEmpty()) {
balancer.balance(new BalanceParamsImpl(getAssignments(servers), migrations, migrationsOut));
}
}
Aggregations