use of org.apache.accumulo.core.master.thrift.TabletServerStatus in project accumulo by apache.
the class TableInfoUtil method summarizeTableStats.
public static Map<String, Double> summarizeTableStats(MasterMonitorInfo mmi) {
Map<String, Double> compactingByTable = new HashMap<>();
if (mmi != null && mmi.tServerInfo != null) {
for (TabletServerStatus status : mmi.tServerInfo) {
if (status != null && status.tableMap != null) {
for (String table : status.tableMap.keySet()) {
Double holdTime = compactingByTable.get(table);
compactingByTable.put(table, Math.max(holdTime == null ? 0. : holdTime, status.holdTime));
}
}
}
}
return compactingByTable;
}
use of org.apache.accumulo.core.master.thrift.TabletServerStatus in project accumulo by apache.
the class GetMasterStats method main.
public static void main(String[] args) throws Exception {
MasterClientService.Iface client = null;
MasterMonitorInfo stats = null;
Instance instance = HdfsZooInstance.getInstance();
AccumuloServerContext context = new AccumuloServerContext(instance, new ServerConfigurationFactory(instance));
while (true) {
try {
client = MasterClient.getConnectionWithRetry(context);
stats = client.getMasterStats(Tracer.traceInfo(), context.rpcCreds());
break;
} catch (ThriftNotActiveServiceException e) {
// Let it loop, fetching a new location
sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
} finally {
if (client != null)
MasterClient.close(client);
}
}
out(0, "State: " + stats.state.name());
out(0, "Goal State: " + stats.goalState.name());
if (stats.serversShuttingDown != null && stats.serversShuttingDown.size() > 0) {
out(0, "Servers to shutdown");
for (String server : stats.serversShuttingDown) {
out(1, "%s", server);
}
}
out(0, "Unassigned tablets: %d", stats.unassignedTablets);
if (stats.badTServers != null && stats.badTServers.size() > 0) {
out(0, "Bad servers");
for (Entry<String, Byte> entry : stats.badTServers.entrySet()) {
out(1, "%s: %d", entry.getKey(), (int) entry.getValue());
}
}
out(0, "Dead tablet servers count: %s", stats.deadTabletServers.size());
for (DeadServer dead : stats.deadTabletServers) {
out(1, "Dead tablet server: %s", dead.server);
out(2, "Last report: %s", new SimpleDateFormat().format(new Date(dead.lastStatus)));
out(2, "Cause: %s", dead.status);
}
out(0, "Bulk imports: %s", stats.bulkImports.size());
for (BulkImportStatus bulk : stats.bulkImports) {
out(1, "Import directory: %s", bulk.filename);
out(2, "Bulk state %s", bulk.state);
out(2, "Bulk start %s", bulk.startTime);
}
if (stats.tableMap != null && stats.tableMap.size() > 0) {
out(0, "Tables");
for (Entry<String, TableInfo> entry : stats.tableMap.entrySet()) {
TableInfo v = entry.getValue();
out(1, "%s", entry.getKey());
out(2, "Records: %d", v.recs);
out(2, "Records in Memory: %d", v.recsInMemory);
out(2, "Tablets: %d", v.tablets);
out(2, "Online Tablets: %d", v.onlineTablets);
out(2, "Ingest Rate: %.2f", v.ingestRate);
out(2, "Query Rate: %.2f", v.queryRate);
}
}
if (stats.tServerInfo != null && stats.tServerInfo.size() > 0) {
out(0, "Tablet Servers");
long now = System.currentTimeMillis();
for (TabletServerStatus server : stats.tServerInfo) {
TableInfo summary = TableInfoUtil.summarizeTableStats(server);
out(1, "Name: %s", server.name);
out(2, "Ingest: %.2f", summary.ingestRate);
out(2, "Last Contact: %s", server.lastContact);
out(2, "OS Load Average: %.2f", server.osLoad);
out(2, "Queries: %.2f", summary.queryRate);
out(2, "Time Difference: %.1f", ((now - server.lastContact) / 1000.));
out(2, "Total Records: %d", summary.recs);
out(2, "Lookups: %d", server.lookups);
if (server.holdTime > 0)
out(2, "Hold Time: %d", server.holdTime);
if (server.tableMap != null && server.tableMap.size() > 0) {
out(2, "Tables");
for (Entry<String, TableInfo> status : server.tableMap.entrySet()) {
TableInfo info = status.getValue();
out(3, "Table: %s", status.getKey());
out(4, "Tablets: %d", info.onlineTablets);
out(4, "Records: %d", info.recs);
out(4, "Records in Memory: %d", info.recsInMemory);
out(4, "Ingest: %.2f", info.ingestRate);
out(4, "Queries: %.2f", info.queryRate);
out(4, "Major Compacting: %d", info.majors == null ? 0 : info.majors.running);
out(4, "Queued for Major Compaction: %d", info.majors == null ? 0 : info.majors.queued);
out(4, "Minor Compacting: %d", info.minors == null ? 0 : info.minors.running);
out(4, "Queued for Minor Compaction: %d", info.minors == null ? 0 : info.minors.queued);
}
}
out(2, "Recoveries: %d", server.logSorts.size());
for (RecoveryStatus sort : server.logSorts) {
out(3, "File: %s", sort.name);
out(3, "Progress: %.2f%%", sort.progress * 100);
out(3, "Time running: %s", sort.runtime / 1000.);
}
out(3, "Bulk imports: %s", stats.bulkImports.size());
for (BulkImportStatus bulk : stats.bulkImports) {
out(4, "Import file: %s", bulk.filename);
out(5, "Bulk state %s", bulk.state);
out(5, "Bulk start %s", bulk.startTime);
}
}
}
}
use of org.apache.accumulo.core.master.thrift.TabletServerStatus in project accumulo by apache.
the class HostRegexTableLoadBalancer method balance.
@Override
public long balance(SortedMap<TServerInstance, TabletServerStatus> current, Set<KeyExtent> migrations, List<TabletMigration> migrationsOut) {
long minBalanceTime = 20 * 1000;
// Iterate over the tables and balance each of them
TableOperations t = getTableOperations();
if (t == null)
return minBalanceTime;
Map<String, String> tableIdMap = t.tableIdMap();
long now = System.currentTimeMillis();
Map<String, SortedMap<TServerInstance, TabletServerStatus>> currentGrouped = splitCurrentByRegex(current);
if ((now - this.lastOOBCheck) > this.oobCheckMillis) {
try {
// Check to see if a tablet is assigned outside the bounds of the pool. If so, migrate it.
for (String table : t.list()) {
LOG.debug("Checking for out of bounds tablets for table {}", table);
String tablePoolName = getPoolNameForTable(table);
for (Entry<TServerInstance, TabletServerStatus> e : current.entrySet()) {
// pool names are the same as table names, except in the DEFAULT case.
// If this table is assigned to a pool for this host, then move on.
List<String> hostPools = getPoolNamesForHost(e.getKey().host());
if (hostPools.contains(tablePoolName)) {
continue;
}
String tid = tableIdMap.get(table);
if (null == tid) {
LOG.warn("Unable to check for out of bounds tablets for table {}, it may have been deleted or renamed.", table);
continue;
}
try {
List<TabletStats> outOfBoundsTablets = getOnlineTabletsForTable(e.getKey(), Table.ID.of(tid));
if (null == outOfBoundsTablets) {
continue;
}
Random random = new Random();
for (TabletStats ts : outOfBoundsTablets) {
KeyExtent ke = new KeyExtent(ts.getExtent());
if (migrations.contains(ke)) {
LOG.debug("Migration for out of bounds tablet {} has already been requested", ke);
continue;
}
String poolName = getPoolNameForTable(table);
SortedMap<TServerInstance, TabletServerStatus> currentView = currentGrouped.get(poolName);
if (null != currentView) {
int skip = random.nextInt(currentView.size());
Iterator<TServerInstance> iter = currentView.keySet().iterator();
for (int i = 0; i < skip; i++) {
iter.next();
}
TServerInstance nextTS = iter.next();
LOG.info("Tablet {} is currently outside the bounds of the regex, migrating from {} to {}", ke, e.getKey(), nextTS);
migrationsOut.add(new TabletMigration(ke, e.getKey(), nextTS));
if (migrationsOut.size() >= this.maxTServerMigrations) {
break;
}
} else {
LOG.warn("No tablet servers online for pool {}, unable to migrate out of bounds tablets", poolName);
}
}
} catch (TException e1) {
LOG.error("Error in OOB check getting tablets for table {} from server {}", tid, e.getKey().host(), e);
}
}
}
} finally {
// this could have taken a while...get a new time
this.lastOOBCheck = System.currentTimeMillis();
}
}
if (migrationsOut.size() > 0) {
LOG.warn("Not balancing tables due to moving {} out of bounds tablets", migrationsOut.size());
LOG.info("Migrating out of bounds tablets: {}", migrationsOut);
return minBalanceTime;
}
if (migrations != null && migrations.size() > 0) {
if (migrations.size() >= maxOutstandingMigrations) {
LOG.warn("Not balancing tables due to {} outstanding migrations", migrations.size());
if (LOG.isTraceEnabled()) {
LOG.trace("Sample up to 10 outstanding migrations: {}", Iterables.limit(migrations, 10));
}
return minBalanceTime;
}
LOG.debug("Current outstanding migrations of {} being applied", migrations.size());
if (LOG.isTraceEnabled()) {
LOG.trace("Sample up to 10 outstanding migrations: {}", Iterables.limit(migrations, 10));
}
migrationsFromLastPass.keySet().retainAll(migrations);
SortedMap<TServerInstance, TabletServerStatus> currentCopy = new TreeMap<>(current);
Multimap<TServerInstance, String> serverTableIdCopied = HashMultimap.create();
for (TabletMigration migration : migrationsFromLastPass.values()) {
TableInfo fromInfo = getTableInfo(currentCopy, serverTableIdCopied, migration.tablet.getTableId().toString(), migration.oldServer);
if (fromInfo != null) {
fromInfo.setOnlineTablets(fromInfo.getOnlineTablets() - 1);
}
TableInfo toInfo = getTableInfo(currentCopy, serverTableIdCopied, migration.tablet.getTableId().toString(), migration.newServer);
if (toInfo != null) {
toInfo.setOnlineTablets(toInfo.getOnlineTablets() + 1);
}
}
migrations = EMPTY_MIGRATIONS;
} else {
migrationsFromLastPass.clear();
}
for (String s : tableIdMap.values()) {
Table.ID tableId = Table.ID.of(s);
String tableName = tableIdToTableName.get(tableId);
String regexTableName = getPoolNameForTable(tableName);
SortedMap<TServerInstance, TabletServerStatus> currentView = currentGrouped.get(regexTableName);
if (null == currentView) {
LOG.warn("Skipping balance for table {} as no tablet servers are online.", tableName);
continue;
}
ArrayList<TabletMigration> newMigrations = new ArrayList<>();
getBalancerForTable(tableId).balance(currentView, migrations, newMigrations);
if (newMigrations.isEmpty()) {
tableToTimeSinceNoMigrations.remove(s);
} else if (tableToTimeSinceNoMigrations.containsKey(s)) {
if ((now - tableToTimeSinceNoMigrations.get(s)) > ONE_HOUR) {
LOG.warn("We have been consistently producing migrations for {}: {}", tableName, Iterables.limit(newMigrations, 10));
}
} else {
tableToTimeSinceNoMigrations.put(s, now);
}
migrationsOut.addAll(newMigrations);
if (migrationsOut.size() >= this.maxTServerMigrations) {
break;
}
}
for (TabletMigration migration : migrationsOut) {
migrationsFromLastPass.put(migration.tablet, migration);
}
LOG.info("Migrating tablets for balance: {}", migrationsOut);
return minBalanceTime;
}
use of org.apache.accumulo.core.master.thrift.TabletServerStatus in project accumulo by apache.
the class Master method gatherTableInformation.
private SortedMap<TServerInstance, TabletServerStatus> gatherTableInformation(Set<TServerInstance> currentServers) {
long start = System.currentTimeMillis();
int threads = Math.max(getConfiguration().getCount(Property.MASTER_STATUS_THREAD_POOL_SIZE), 1);
ExecutorService tp = Executors.newFixedThreadPool(threads);
final SortedMap<TServerInstance, TabletServerStatus> result = new TreeMap<>();
for (TServerInstance serverInstance : currentServers) {
final TServerInstance server = serverInstance;
tp.submit(new Runnable() {
@Override
public void run() {
try {
Thread t = Thread.currentThread();
String oldName = t.getName();
try {
t.setName("Getting status from " + server);
TServerConnection connection = tserverSet.getConnection(server);
if (connection == null)
throw new IOException("No connection to " + server);
TabletServerStatus status = connection.getTableMap(false);
result.put(server, status);
} finally {
t.setName(oldName);
}
} catch (Exception ex) {
log.error("unable to get tablet server status {} {}", server, ex.toString());
log.debug("unable to get tablet server status {}", server, ex);
if (badServers.get(server).incrementAndGet() > MAX_BAD_STATUS_COUNT) {
log.warn("attempting to stop {}", server);
try {
TServerConnection connection = tserverSet.getConnection(server);
if (connection != null) {
connection.halt(masterLock);
}
} catch (TTransportException e) {
// ignore: it's probably down
} catch (Exception e) {
log.info("error talking to troublesome tablet server", e);
}
badServers.remove(server);
}
}
}
});
}
tp.shutdown();
try {
tp.awaitTermination(getConfiguration().getTimeInMillis(Property.TSERV_CLIENT_TIMEOUT) * 2, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
log.debug("Interrupted while fetching status");
}
synchronized (badServers) {
badServers.keySet().retainAll(currentServers);
badServers.keySet().removeAll(result.keySet());
}
log.debug(String.format("Finished gathering information from %d servers in %.2f seconds", result.size(), (System.currentTimeMillis() - start) / 1000.));
return result;
}
use of org.apache.accumulo.core.master.thrift.TabletServerStatus in project accumulo by apache.
the class HostRegexTableLoadBalancerTest method testSplitCurrentByRegexUsingOverlappingPools.
@Test
public void testSplitCurrentByRegexUsingOverlappingPools() {
init(new AccumuloServerContext(instance, new TestServerConfigurationFactory(instance) {
@Override
public TableConfiguration getTableConfiguration(Table.ID tableId) {
NamespaceConfiguration defaultConf = new NamespaceConfiguration(Namespace.ID.DEFAULT, this.instance, DefaultConfiguration.getInstance());
return new TableConfiguration(instance, tableId, defaultConf) {
HashMap<String, String> tableProperties = new HashMap<>();
{
tableProperties.put(HostRegexTableLoadBalancer.HOST_BALANCER_PREFIX + FOO.getTableName(), "r.*");
tableProperties.put(HostRegexTableLoadBalancer.HOST_BALANCER_PREFIX + BAR.getTableName(), "r01.*|r02.*");
}
@Override
public String get(Property property) {
return tableProperties.get(property.name());
}
@Override
public void getProperties(Map<String, String> props, Predicate<String> filter) {
for (Entry<String, String> e : tableProperties.entrySet()) {
if (filter.test(e.getKey())) {
props.put(e.getKey(), e.getValue());
}
}
}
@Override
public long getUpdateCount() {
return 0;
}
};
}
}));
Map<String, SortedMap<TServerInstance, TabletServerStatus>> groups = this.splitCurrentByRegex(createCurrent(15));
Assert.assertEquals(2, groups.size());
Assert.assertTrue(groups.containsKey(FOO.getTableName()));
SortedMap<TServerInstance, TabletServerStatus> fooHosts = groups.get(FOO.getTableName());
Assert.assertEquals(15, fooHosts.size());
Assert.assertTrue(fooHosts.containsKey(new TServerInstance("192.168.0.1:9997", 1)));
Assert.assertTrue(fooHosts.containsKey(new TServerInstance("192.168.0.2:9997", 1)));
Assert.assertTrue(fooHosts.containsKey(new TServerInstance("192.168.0.3:9997", 1)));
Assert.assertTrue(fooHosts.containsKey(new TServerInstance("192.168.0.4:9997", 1)));
Assert.assertTrue(fooHosts.containsKey(new TServerInstance("192.168.0.5:9997", 1)));
Assert.assertTrue(fooHosts.containsKey(new TServerInstance("192.168.0.6:9997", 1)));
Assert.assertTrue(fooHosts.containsKey(new TServerInstance("192.168.0.7:9997", 1)));
Assert.assertTrue(fooHosts.containsKey(new TServerInstance("192.168.0.8:9997", 1)));
Assert.assertTrue(fooHosts.containsKey(new TServerInstance("192.168.0.9:9997", 1)));
Assert.assertTrue(fooHosts.containsKey(new TServerInstance("192.168.0.10:9997", 1)));
Assert.assertTrue(fooHosts.containsKey(new TServerInstance("192.168.0.11:9997", 1)));
Assert.assertTrue(fooHosts.containsKey(new TServerInstance("192.168.0.12:9997", 1)));
Assert.assertTrue(fooHosts.containsKey(new TServerInstance("192.168.0.13:9997", 1)));
Assert.assertTrue(fooHosts.containsKey(new TServerInstance("192.168.0.14:9997", 1)));
Assert.assertTrue(fooHosts.containsKey(new TServerInstance("192.168.0.15:9997", 1)));
Assert.assertTrue(groups.containsKey(BAR.getTableName()));
SortedMap<TServerInstance, TabletServerStatus> barHosts = groups.get(BAR.getTableName());
Assert.assertEquals(10, barHosts.size());
Assert.assertTrue(barHosts.containsKey(new TServerInstance("192.168.0.1:9997", 1)));
Assert.assertTrue(barHosts.containsKey(new TServerInstance("192.168.0.2:9997", 1)));
Assert.assertTrue(barHosts.containsKey(new TServerInstance("192.168.0.3:9997", 1)));
Assert.assertTrue(barHosts.containsKey(new TServerInstance("192.168.0.4:9997", 1)));
Assert.assertTrue(barHosts.containsKey(new TServerInstance("192.168.0.5:9997", 1)));
Assert.assertTrue(barHosts.containsKey(new TServerInstance("192.168.0.6:9997", 1)));
Assert.assertTrue(barHosts.containsKey(new TServerInstance("192.168.0.7:9997", 1)));
Assert.assertTrue(barHosts.containsKey(new TServerInstance("192.168.0.8:9997", 1)));
Assert.assertTrue(barHosts.containsKey(new TServerInstance("192.168.0.9:9997", 1)));
Assert.assertTrue(barHosts.containsKey(new TServerInstance("192.168.0.10:9997", 1)));
}
Aggregations