use of org.apache.accumulo.core.master.thrift.MasterMonitorInfo in project accumulo by apache.
the class BalanceAfterCommsFailureIT method checkBalance.
private void checkBalance(Connector c) throws Exception {
Credentials creds = new Credentials("root", new PasswordToken(ROOT_PASSWORD));
ClientContext context = new ClientContext(c.getInstance(), creds, getClientConfig());
MasterMonitorInfo stats = null;
int unassignedTablets = 1;
for (int i = 0; unassignedTablets > 0 && i < 10; i++) {
MasterClientService.Iface client = null;
while (true) {
try {
client = MasterClient.getConnectionWithRetry(context);
stats = client.getMasterStats(Tracer.traceInfo(), context.rpcCreds());
break;
} catch (ThriftNotActiveServiceException e) {
// Let it loop, fetching a new location
log.debug("Contacted a Master which is no longer active, retrying");
sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
} finally {
if (client != null)
MasterClient.close(client);
}
}
unassignedTablets = stats.getUnassignedTablets();
if (unassignedTablets > 0) {
log.info("Found {} unassigned tablets, sleeping 3 seconds for tablet assignment", unassignedTablets);
Thread.sleep(3000);
}
}
assertEquals("Unassigned tablets were not assigned within 30 seconds", 0, unassignedTablets);
List<Integer> counts = new ArrayList<>();
for (TabletServerStatus server : stats.tServerInfo) {
int count = 0;
for (TableInfo table : server.tableMap.values()) {
count += table.onlineTablets;
}
counts.add(count);
}
assertTrue("Expected to have at least two TabletServers", counts.size() > 1);
for (int i = 1; i < counts.size(); i++) {
int diff = Math.abs(counts.get(0) - counts.get(i));
assertTrue("Expected difference in tablets to be less than or equal to " + counts.size() + " but was " + diff + ". Counts " + counts, diff <= counts.size());
}
}
use of org.apache.accumulo.core.master.thrift.MasterMonitorInfo in project accumulo by apache.
the class BalanceInPresenceOfOfflineTableIT method test.
@Test
public void test() throws Exception {
log.info("Test that balancing is not stopped by an offline table with outstanding migrations.");
log.debug("starting test ingestion");
TestIngest.Opts opts = new TestIngest.Opts();
VerifyIngest.Opts vopts = new VerifyIngest.Opts();
ClientConfiguration conf = cluster.getClientConfig();
if (conf.hasSasl()) {
opts.updateKerberosCredentials(cluster.getClientConfig());
vopts.updateKerberosCredentials(cluster.getClientConfig());
} else {
opts.setPrincipal("root");
vopts.setPrincipal("root");
}
vopts.rows = opts.rows = 200000;
opts.setTableName(TEST_TABLE);
TestIngest.ingest(connector, opts, new BatchWriterOpts());
connector.tableOperations().flush(TEST_TABLE, null, null, true);
vopts.setTableName(TEST_TABLE);
VerifyIngest.verifyIngest(connector, vopts, new ScannerOpts());
log.debug("waiting for balancing, up to ~5 minutes to allow for migration cleanup.");
final long startTime = System.currentTimeMillis();
long currentWait = 10 * 1000;
boolean balancingWorked = false;
Credentials creds = new Credentials(getAdminPrincipal(), getAdminToken());
while (!balancingWorked && (System.currentTimeMillis() - startTime) < ((5 * 60 + 15) * 1000)) {
Thread.sleep(currentWait);
currentWait *= 2;
log.debug("fetch the list of tablets assigned to each tserver.");
MasterClientService.Iface client = null;
MasterMonitorInfo stats = null;
Instance instance = new ZooKeeperInstance(cluster.getClientConfig());
while (true) {
try {
client = MasterClient.getConnectionWithRetry(new ClientContext(instance, creds, cluster.getClientConfig()));
stats = client.getMasterStats(Tracer.traceInfo(), creds.toThrift(instance));
break;
} catch (ThriftSecurityException exception) {
throw new AccumuloSecurityException(exception);
} catch (ThriftNotActiveServiceException e) {
// Let it loop, fetching a new location
log.debug("Contacted a Master which is no longer active, retrying");
sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
} catch (TException exception) {
throw new AccumuloException(exception);
} finally {
if (client != null) {
MasterClient.close(client);
}
}
}
if (stats.getTServerInfoSize() < 2) {
log.debug("we need >= 2 servers. sleeping for {}ms", currentWait);
continue;
}
if (stats.getUnassignedTablets() != 0) {
log.debug("We shouldn't have unassigned tablets. sleeping for {}ms", currentWait);
continue;
}
long[] tabletsPerServer = new long[stats.getTServerInfoSize()];
Arrays.fill(tabletsPerServer, 0l);
for (int i = 0; i < stats.getTServerInfoSize(); i++) {
for (Map.Entry<String, TableInfo> entry : stats.getTServerInfo().get(i).getTableMap().entrySet()) {
tabletsPerServer[i] += entry.getValue().getTablets();
}
}
if (tabletsPerServer[0] <= 10) {
log.debug("We should have > 10 tablets. sleeping for {}ms", currentWait);
continue;
}
long min = NumberUtils.min(tabletsPerServer), max = NumberUtils.max(tabletsPerServer);
log.debug("Min={}, Max={}", min, max);
if ((min / ((double) max)) < 0.5) {
log.debug("ratio of min to max tablets per server should be roughly even. sleeping for {}ms", currentWait);
continue;
}
balancingWorked = true;
}
Assert.assertTrue("did not properly balance", balancingWorked);
}
use of org.apache.accumulo.core.master.thrift.MasterMonitorInfo in project accumulo by apache.
the class BulkImportMonitoringIT method test.
@Test
public void test() throws Exception {
getCluster().getClusterControl().start(ServerType.MONITOR);
final Connector c = getConnector();
final String tableName = getUniqueNames(1)[0];
c.tableOperations().create(tableName);
c.tableOperations().setProperty(tableName, Property.TABLE_MAJC_RATIO.getKey(), "1");
// splits to slow down bulk import
SortedSet<Text> splits = new TreeSet<>();
for (int i = 1; i < 0xf; i++) {
splits.add(new Text(Integer.toHexString(i)));
}
c.tableOperations().addSplits(tableName, splits);
MasterMonitorInfo stats = getCluster().getMasterMonitorInfo();
assertEquals(1, stats.tServerInfo.size());
assertEquals(0, stats.bulkImports.size());
assertEquals(0, stats.tServerInfo.get(0).bulkImports.size());
log.info("Creating lots of bulk import files");
final FileSystem fs = getCluster().getFileSystem();
final Path basePath = getCluster().getTemporaryPath();
CachedConfiguration.setInstance(fs.getConf());
final Path base = new Path(basePath, "testBulkLoad" + tableName);
fs.delete(base, true);
fs.mkdirs(base);
ExecutorService es = Executors.newFixedThreadPool(5);
List<Future<Pair<String, String>>> futures = new ArrayList<>();
for (int i = 0; i < 10; i++) {
final int which = i;
futures.add(es.submit(new Callable<Pair<String, String>>() {
@Override
public Pair<String, String> call() throws Exception {
Path bulkFailures = new Path(base, "failures" + which);
Path files = new Path(base, "files" + which);
fs.mkdirs(bulkFailures);
fs.mkdirs(files);
for (int i = 0; i < 10; i++) {
FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder().forFile(files.toString() + "/bulk_" + i + "." + RFile.EXTENSION, fs, fs.getConf()).withTableConfiguration(DefaultConfiguration.getInstance()).build();
writer.startDefaultLocalityGroup();
for (int j = 0x100; j < 0xfff; j += 3) {
writer.append(new Key(Integer.toHexString(j)), new Value(new byte[0]));
}
writer.close();
}
return new Pair<>(files.toString(), bulkFailures.toString());
}
}));
}
List<Pair<String, String>> dirs = new ArrayList<>();
for (Future<Pair<String, String>> f : futures) {
dirs.add(f.get());
}
log.info("Importing");
long now = System.currentTimeMillis();
List<Future<Object>> errs = new ArrayList<>();
for (Pair<String, String> entry : dirs) {
final String dir = entry.getFirst();
final String err = entry.getSecond();
errs.add(es.submit(new Callable<Object>() {
@Override
public Object call() throws Exception {
c.tableOperations().importDirectory(tableName, dir, err, false);
return null;
}
}));
}
es.shutdown();
while (!es.isTerminated() && stats.bulkImports.size() + stats.tServerInfo.get(0).bulkImports.size() == 0) {
es.awaitTermination(10, TimeUnit.MILLISECONDS);
stats = getCluster().getMasterMonitorInfo();
}
log.info(stats.bulkImports.toString());
assertTrue(stats.bulkImports.size() > 0);
// look for exception
for (Future<Object> err : errs) {
err.get();
}
es.awaitTermination(2, TimeUnit.MINUTES);
assertTrue(es.isTerminated());
log.info(String.format("Completed in %.2f seconds", (System.currentTimeMillis() - now) / 1000.));
}
use of org.apache.accumulo.core.master.thrift.MasterMonitorInfo in project accumulo by apache.
the class MiniAccumuloClusterImplTest method saneMonitorInfo.
@Test(timeout = 60000)
public void saneMonitorInfo() throws Exception {
MasterMonitorInfo stats;
while (true) {
stats = accumulo.getMasterMonitorInfo();
if (stats.tableMap.size() <= 2) {
continue;
}
if (null != stats.tServerInfo && stats.tServerInfo.size() == NUM_TSERVERS) {
break;
}
}
List<MasterState> validStates = Arrays.asList(MasterState.values());
List<MasterGoalState> validGoals = Arrays.asList(MasterGoalState.values());
Assert.assertTrue("master state should be valid.", validStates.contains(stats.state));
Assert.assertTrue("master goal state should be in " + validGoals + ". is " + stats.goalState, validGoals.contains(stats.goalState));
Assert.assertNotNull("should have a table map.", stats.tableMap);
Assert.assertTrue("root table should exist in " + stats.tableMap.keySet(), stats.tableMap.keySet().contains(RootTable.ID.canonicalID()));
Assert.assertTrue("meta table should exist in " + stats.tableMap.keySet(), stats.tableMap.keySet().contains(MetadataTable.ID.canonicalID()));
Assert.assertTrue("our test table should exist in " + stats.tableMap.keySet(), stats.tableMap.keySet().contains(testTableID));
Assert.assertNotNull("there should be tservers.", stats.tServerInfo);
Assert.assertEquals(NUM_TSERVERS, stats.tServerInfo.size());
}
use of org.apache.accumulo.core.master.thrift.MasterMonitorInfo in project accumulo by apache.
the class Master method getMasterMonitorInfo.
public MasterMonitorInfo getMasterMonitorInfo() {
final MasterMonitorInfo result = new MasterMonitorInfo();
result.tServerInfo = new ArrayList<>();
result.tableMap = new DefaultMap<>(new TableInfo());
for (Entry<TServerInstance, TabletServerStatus> serverEntry : tserverStatus.entrySet()) {
final TabletServerStatus status = serverEntry.getValue();
result.tServerInfo.add(status);
for (Entry<String, TableInfo> entry : status.tableMap.entrySet()) {
TableInfoUtil.add(result.tableMap.get(entry.getKey()), entry.getValue());
}
}
result.badTServers = new HashMap<>();
synchronized (badServers) {
for (TServerInstance bad : badServers.keySet()) {
result.badTServers.put(bad.hostPort(), TabletServerState.UNRESPONSIVE.getId());
}
}
result.state = getMasterState();
result.goalState = getMasterGoalState();
result.unassignedTablets = displayUnassigned();
result.serversShuttingDown = new HashSet<>();
synchronized (serversToShutdown) {
for (TServerInstance server : serversToShutdown) result.serversShuttingDown.add(server.hostPort());
}
DeadServerList obit = new DeadServerList(ZooUtil.getRoot(getInstance()) + Constants.ZDEADTSERVERS);
result.deadTabletServers = obit.getList();
result.bulkImports = bulkImportStatus.getBulkLoadStatus();
return result;
}
Aggregations