use of org.apache.accumulo.core.master.thrift.TableInfo in project accumulo by apache.
the class BalanceAfterCommsFailureIT method checkBalance.
private void checkBalance(Connector c) throws Exception {
Credentials creds = new Credentials("root", new PasswordToken(ROOT_PASSWORD));
ClientContext context = new ClientContext(c.getInstance(), creds, getClientConfig());
MasterMonitorInfo stats = null;
int unassignedTablets = 1;
for (int i = 0; unassignedTablets > 0 && i < 10; i++) {
MasterClientService.Iface client = null;
while (true) {
try {
client = MasterClient.getConnectionWithRetry(context);
stats = client.getMasterStats(Tracer.traceInfo(), context.rpcCreds());
break;
} catch (ThriftNotActiveServiceException e) {
// Let it loop, fetching a new location
log.debug("Contacted a Master which is no longer active, retrying");
sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
} finally {
if (client != null)
MasterClient.close(client);
}
}
unassignedTablets = stats.getUnassignedTablets();
if (unassignedTablets > 0) {
log.info("Found {} unassigned tablets, sleeping 3 seconds for tablet assignment", unassignedTablets);
Thread.sleep(3000);
}
}
assertEquals("Unassigned tablets were not assigned within 30 seconds", 0, unassignedTablets);
List<Integer> counts = new ArrayList<>();
for (TabletServerStatus server : stats.tServerInfo) {
int count = 0;
for (TableInfo table : server.tableMap.values()) {
count += table.onlineTablets;
}
counts.add(count);
}
assertTrue("Expected to have at least two TabletServers", counts.size() > 1);
for (int i = 1; i < counts.size(); i++) {
int diff = Math.abs(counts.get(0) - counts.get(i));
assertTrue("Expected difference in tablets to be less than or equal to " + counts.size() + " but was " + diff + ". Counts " + counts, diff <= counts.size());
}
}
use of org.apache.accumulo.core.master.thrift.TableInfo in project accumulo by apache.
the class BalanceInPresenceOfOfflineTableIT method test.
@Test
public void test() throws Exception {
log.info("Test that balancing is not stopped by an offline table with outstanding migrations.");
log.debug("starting test ingestion");
TestIngest.Opts opts = new TestIngest.Opts();
VerifyIngest.Opts vopts = new VerifyIngest.Opts();
ClientConfiguration conf = cluster.getClientConfig();
if (conf.hasSasl()) {
opts.updateKerberosCredentials(cluster.getClientConfig());
vopts.updateKerberosCredentials(cluster.getClientConfig());
} else {
opts.setPrincipal("root");
vopts.setPrincipal("root");
}
vopts.rows = opts.rows = 200000;
opts.setTableName(TEST_TABLE);
TestIngest.ingest(connector, opts, new BatchWriterOpts());
connector.tableOperations().flush(TEST_TABLE, null, null, true);
vopts.setTableName(TEST_TABLE);
VerifyIngest.verifyIngest(connector, vopts, new ScannerOpts());
log.debug("waiting for balancing, up to ~5 minutes to allow for migration cleanup.");
final long startTime = System.currentTimeMillis();
long currentWait = 10 * 1000;
boolean balancingWorked = false;
Credentials creds = new Credentials(getAdminPrincipal(), getAdminToken());
while (!balancingWorked && (System.currentTimeMillis() - startTime) < ((5 * 60 + 15) * 1000)) {
Thread.sleep(currentWait);
currentWait *= 2;
log.debug("fetch the list of tablets assigned to each tserver.");
MasterClientService.Iface client = null;
MasterMonitorInfo stats = null;
Instance instance = new ZooKeeperInstance(cluster.getClientConfig());
while (true) {
try {
client = MasterClient.getConnectionWithRetry(new ClientContext(instance, creds, cluster.getClientConfig()));
stats = client.getMasterStats(Tracer.traceInfo(), creds.toThrift(instance));
break;
} catch (ThriftSecurityException exception) {
throw new AccumuloSecurityException(exception);
} catch (ThriftNotActiveServiceException e) {
// Let it loop, fetching a new location
log.debug("Contacted a Master which is no longer active, retrying");
sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
} catch (TException exception) {
throw new AccumuloException(exception);
} finally {
if (client != null) {
MasterClient.close(client);
}
}
}
if (stats.getTServerInfoSize() < 2) {
log.debug("we need >= 2 servers. sleeping for {}ms", currentWait);
continue;
}
if (stats.getUnassignedTablets() != 0) {
log.debug("We shouldn't have unassigned tablets. sleeping for {}ms", currentWait);
continue;
}
long[] tabletsPerServer = new long[stats.getTServerInfoSize()];
Arrays.fill(tabletsPerServer, 0l);
for (int i = 0; i < stats.getTServerInfoSize(); i++) {
for (Map.Entry<String, TableInfo> entry : stats.getTServerInfo().get(i).getTableMap().entrySet()) {
tabletsPerServer[i] += entry.getValue().getTablets();
}
}
if (tabletsPerServer[0] <= 10) {
log.debug("We should have > 10 tablets. sleeping for {}ms", currentWait);
continue;
}
long min = NumberUtils.min(tabletsPerServer), max = NumberUtils.max(tabletsPerServer);
log.debug("Min={}, Max={}", min, max);
if ((min / ((double) max)) < 0.5) {
log.debug("ratio of min to max tablets per server should be roughly even. sleeping for {}ms", currentWait);
continue;
}
balancingWorked = true;
}
Assert.assertTrue("did not properly balance", balancingWorked);
}
use of org.apache.accumulo.core.master.thrift.TableInfo in project accumulo by apache.
the class Master method getMasterMonitorInfo.
public MasterMonitorInfo getMasterMonitorInfo() {
final MasterMonitorInfo result = new MasterMonitorInfo();
result.tServerInfo = new ArrayList<>();
result.tableMap = new DefaultMap<>(new TableInfo());
for (Entry<TServerInstance, TabletServerStatus> serverEntry : tserverStatus.entrySet()) {
final TabletServerStatus status = serverEntry.getValue();
result.tServerInfo.add(status);
for (Entry<String, TableInfo> entry : status.tableMap.entrySet()) {
TableInfoUtil.add(result.tableMap.get(entry.getKey()), entry.getValue());
}
}
result.badTServers = new HashMap<>();
synchronized (badServers) {
for (TServerInstance bad : badServers.keySet()) {
result.badTServers.put(bad.hostPort(), TabletServerState.UNRESPONSIVE.getId());
}
}
result.state = getMasterState();
result.goalState = getMasterGoalState();
result.unassignedTablets = displayUnassigned();
result.serversShuttingDown = new HashSet<>();
synchronized (serversToShutdown) {
for (TServerInstance server : serversToShutdown) result.serversShuttingDown.add(server.hostPort());
}
DeadServerList obit = new DeadServerList(ZooUtil.getRoot(getInstance()) + Constants.ZDEADTSERVERS);
result.deadTabletServers = obit.getList();
result.bulkImports = bulkImportStatus.getBulkLoadStatus();
return result;
}
use of org.apache.accumulo.core.master.thrift.TableInfo in project accumulo by apache.
the class BaseHostRegexTableLoadBalancerTest method createCurrent.
protected SortedMap<TServerInstance, TabletServerStatus> createCurrent(int numTservers) {
String base = "192.168.0.";
TreeMap<TServerInstance, TabletServerStatus> current = new TreeMap<>();
for (int i = 1; i <= numTservers; i++) {
TabletServerStatus status = new TabletServerStatus();
Map<String, TableInfo> tableMap = new HashMap<>();
tableMap.put(FOO.getId().canonicalID(), new TableInfo());
tableMap.put(BAR.getId().canonicalID(), new TableInfo());
tableMap.put(BAZ.getId().canonicalID(), new TableInfo());
status.setTableMap(tableMap);
current.put(new TServerInstance(base + i + ":9997", 1), status);
}
// now put all of the tablets on one server
for (Map.Entry<String, TServerInstance> entry : initialTableLocation.entrySet()) {
TabletServerStatus status = current.get(entry.getValue());
if (status != null) {
String tableId = getTableOperations().tableIdMap().get(entry.getKey());
status.getTableMap().get(tableId).setOnlineTablets(5);
}
}
return current;
}
use of org.apache.accumulo.core.master.thrift.TableInfo in project accumulo by apache.
the class ChaoticLoadBalancer method balance.
@Override
public long balance(SortedMap<TServerInstance, TabletServerStatus> current, Set<KeyExtent> migrations, List<TabletMigration> migrationsOut) {
Map<TServerInstance, Long> numTablets = new HashMap<>();
List<TServerInstance> underCapacityTServer = new ArrayList<>();
if (!migrations.isEmpty()) {
outstandingMigrations.migrations = migrations;
constraintNotMet(outstandingMigrations);
return 100;
}
resetBalancerErrors();
boolean moveMetadata = r.nextInt(4) == 0;
long totalTablets = 0;
for (Entry<TServerInstance, TabletServerStatus> e : current.entrySet()) {
long tabletCount = 0;
for (TableInfo ti : e.getValue().getTableMap().values()) {
tabletCount += ti.tablets;
}
numTablets.put(e.getKey(), tabletCount);
underCapacityTServer.add(e.getKey());
totalTablets += tabletCount;
}
// totalTablets is fuzzy due to asynchronicity of the stats
// *1.2 to handle fuzziness, and prevent locking for 'perfect' balancing scenarios
long avg = (long) Math.ceil(((double) totalTablets) / current.size() * 1.2);
for (Entry<TServerInstance, TabletServerStatus> e : current.entrySet()) {
for (String tableId : e.getValue().getTableMap().keySet()) {
Table.ID id = Table.ID.of(tableId);
if (!moveMetadata && MetadataTable.ID.equals(id))
continue;
try {
for (TabletStats ts : getOnlineTabletsForTable(e.getKey(), id)) {
KeyExtent ke = new KeyExtent(ts.extent);
int index = r.nextInt(underCapacityTServer.size());
TServerInstance dest = underCapacityTServer.get(index);
if (dest.equals(e.getKey()))
continue;
migrationsOut.add(new TabletMigration(ke, e.getKey(), dest));
if (numTablets.put(dest, numTablets.get(dest) + 1) > avg)
underCapacityTServer.remove(index);
if (numTablets.put(e.getKey(), numTablets.get(e.getKey()) - 1) <= avg && !underCapacityTServer.contains(e.getKey()))
underCapacityTServer.add(e.getKey());
// We can get some craziness with only 1 tserver, so lets make sure there's always an option!
if (underCapacityTServer.isEmpty())
underCapacityTServer.addAll(numTablets.keySet());
}
} catch (ThriftSecurityException e1) {
// Shouldn't happen, but carry on if it does
log.debug("Encountered ThriftSecurityException. This should not happen. Carrying on anyway.", e1);
} catch (TException e1) {
// Shouldn't happen, but carry on if it does
log.debug("Encountered TException. This should not happen. Carrying on anyway.", e1);
}
}
}
return 100;
}
Aggregations