use of org.apache.accumulo.core.tabletserver.thrift.TabletStats in project accumulo by apache.
the class DefaultLoadBalancer method move.
/**
* Select a tablet based on differences between table loads; if the loads are even, use the busiest table
*/
List<TabletMigration> move(ServerCounts tooMuch, ServerCounts tooLittle, int count, Map<Table.ID, Map<KeyExtent, TabletStats>> donerTabletStats) {
List<TabletMigration> result = new ArrayList<>();
if (count == 0)
return result;
// Copy counts so we can update them as we propose migrations
Map<Table.ID, Integer> tooMuchMap = tabletCountsPerTable(tooMuch.status);
Map<Table.ID, Integer> tooLittleMap = tabletCountsPerTable(tooLittle.status);
for (int i = 0; i < count; i++) {
Table.ID table;
Integer tooLittleCount;
if (tableToBalance == null) {
// find a table to migrate
// look for an uneven table count
int biggestDifference = 0;
Table.ID biggestDifferenceTable = null;
for (Entry<Table.ID, Integer> tableEntry : tooMuchMap.entrySet()) {
Table.ID tableID = tableEntry.getKey();
if (tooLittleMap.get(tableID) == null)
tooLittleMap.put(tableID, 0);
int diff = tableEntry.getValue() - tooLittleMap.get(tableID);
if (diff > biggestDifference) {
biggestDifference = diff;
biggestDifferenceTable = tableID;
}
}
if (biggestDifference < 2) {
table = busiest(tooMuch.status.tableMap);
} else {
table = biggestDifferenceTable;
}
} else {
// just balance the given table
table = tableToBalance;
}
Map<KeyExtent, TabletStats> onlineTabletsForTable = donerTabletStats.get(table);
try {
if (onlineTabletsForTable == null) {
onlineTabletsForTable = new HashMap<>();
List<TabletStats> stats = getOnlineTabletsForTable(tooMuch.server, table);
if (null == stats) {
log.warn("Unable to find tablets to move");
return result;
}
for (TabletStats stat : stats) onlineTabletsForTable.put(new KeyExtent(stat.extent), stat);
donerTabletStats.put(table, onlineTabletsForTable);
}
} catch (Exception ex) {
log.error("Unable to select a tablet to move", ex);
return result;
}
KeyExtent extent = selectTablet(tooMuch.server, onlineTabletsForTable);
onlineTabletsForTable.remove(extent);
if (extent == null)
return result;
tooMuchMap.put(table, tooMuchMap.get(table) - 1);
/**
* If a table grows from 1 tablet then tooLittleMap.get(table) can return a null, since there is only one tabletserver that holds all of the tablets. Here
* we check to see if in fact that is the case and if so set the value to 0.
*/
tooLittleCount = tooLittleMap.get(table);
if (tooLittleCount == null) {
tooLittleCount = 0;
}
tooLittleMap.put(table, tooLittleCount + 1);
tooMuch.count--;
tooLittle.count++;
result.add(new TabletMigration(extent, tooMuch.server, tooLittle.server));
}
return result;
}
use of org.apache.accumulo.core.tabletserver.thrift.TabletStats in project accumulo by apache.
the class TabletServerResource method doCurrentOperations.
private List<CurrentOperations> doCurrentOperations(List<TabletStats> tsStats) throws Exception {
List<CurrentOperations> currentOperations = new ArrayList<>();
for (TabletStats info : tsStats) {
if (info.extent == null) {
historical = info;
continue;
}
total.numEntries += info.numEntries;
ActionStatsUpdator.update(total.minors, info.minors);
ActionStatsUpdator.update(total.majors, info.majors);
KeyExtent extent = new KeyExtent(info.extent);
Table.ID tableId = extent.getTableId();
MessageDigest digester = MessageDigest.getInstance("MD5");
if (extent.getEndRow() != null && extent.getEndRow().getLength() > 0) {
digester.update(extent.getEndRow().getBytes(), 0, extent.getEndRow().getLength());
}
String obscuredExtent = Base64.getEncoder().encodeToString(digester.digest());
String displayExtent = String.format("[%s]", obscuredExtent);
String tableName = Tables.getPrintableTableInfoFromId(HdfsZooInstance.getInstance(), tableId);
currentOperations.add(new CurrentOperations(tableName, tableId, displayExtent, info.numEntries, info.ingestRate, info.queryRate, info.minors.num != 0 ? info.minors.elapsed / info.minors.num : null, stddev(info.minors.elapsed, info.minors.num, info.minors.sumDev), info.minors.elapsed != 0 ? info.minors.count / info.minors.elapsed : null, info.majors.num != 0 ? info.majors.elapsed / info.majors.num : null, stddev(info.majors.elapsed, info.majors.num, info.majors.sumDev), info.majors.elapsed != 0 ? info.majors.count / info.majors.elapsed : null));
}
return currentOperations;
}
use of org.apache.accumulo.core.tabletserver.thrift.TabletStats in project accumulo by apache.
the class HostRegexTableLoadBalancer method balance.
@Override
public long balance(SortedMap<TServerInstance, TabletServerStatus> current, Set<KeyExtent> migrations, List<TabletMigration> migrationsOut) {
long minBalanceTime = 20 * 1000;
// Iterate over the tables and balance each of them
TableOperations t = getTableOperations();
if (t == null)
return minBalanceTime;
Map<String, String> tableIdMap = t.tableIdMap();
long now = System.currentTimeMillis();
Map<String, SortedMap<TServerInstance, TabletServerStatus>> currentGrouped = splitCurrentByRegex(current);
if ((now - this.lastOOBCheck) > this.oobCheckMillis) {
try {
// Check to see if a tablet is assigned outside the bounds of the pool. If so, migrate it.
for (String table : t.list()) {
LOG.debug("Checking for out of bounds tablets for table {}", table);
String tablePoolName = getPoolNameForTable(table);
for (Entry<TServerInstance, TabletServerStatus> e : current.entrySet()) {
// pool names are the same as table names, except in the DEFAULT case.
// If this table is assigned to a pool for this host, then move on.
List<String> hostPools = getPoolNamesForHost(e.getKey().host());
if (hostPools.contains(tablePoolName)) {
continue;
}
String tid = tableIdMap.get(table);
if (null == tid) {
LOG.warn("Unable to check for out of bounds tablets for table {}, it may have been deleted or renamed.", table);
continue;
}
try {
List<TabletStats> outOfBoundsTablets = getOnlineTabletsForTable(e.getKey(), Table.ID.of(tid));
if (null == outOfBoundsTablets) {
continue;
}
Random random = new Random();
for (TabletStats ts : outOfBoundsTablets) {
KeyExtent ke = new KeyExtent(ts.getExtent());
if (migrations.contains(ke)) {
LOG.debug("Migration for out of bounds tablet {} has already been requested", ke);
continue;
}
String poolName = getPoolNameForTable(table);
SortedMap<TServerInstance, TabletServerStatus> currentView = currentGrouped.get(poolName);
if (null != currentView) {
int skip = random.nextInt(currentView.size());
Iterator<TServerInstance> iter = currentView.keySet().iterator();
for (int i = 0; i < skip; i++) {
iter.next();
}
TServerInstance nextTS = iter.next();
LOG.info("Tablet {} is currently outside the bounds of the regex, migrating from {} to {}", ke, e.getKey(), nextTS);
migrationsOut.add(new TabletMigration(ke, e.getKey(), nextTS));
if (migrationsOut.size() >= this.maxTServerMigrations) {
break;
}
} else {
LOG.warn("No tablet servers online for pool {}, unable to migrate out of bounds tablets", poolName);
}
}
} catch (TException e1) {
LOG.error("Error in OOB check getting tablets for table {} from server {}", tid, e.getKey().host(), e);
}
}
}
} finally {
// this could have taken a while...get a new time
this.lastOOBCheck = System.currentTimeMillis();
}
}
if (migrationsOut.size() > 0) {
LOG.warn("Not balancing tables due to moving {} out of bounds tablets", migrationsOut.size());
LOG.info("Migrating out of bounds tablets: {}", migrationsOut);
return minBalanceTime;
}
if (migrations != null && migrations.size() > 0) {
if (migrations.size() >= maxOutstandingMigrations) {
LOG.warn("Not balancing tables due to {} outstanding migrations", migrations.size());
if (LOG.isTraceEnabled()) {
LOG.trace("Sample up to 10 outstanding migrations: {}", Iterables.limit(migrations, 10));
}
return minBalanceTime;
}
LOG.debug("Current outstanding migrations of {} being applied", migrations.size());
if (LOG.isTraceEnabled()) {
LOG.trace("Sample up to 10 outstanding migrations: {}", Iterables.limit(migrations, 10));
}
migrationsFromLastPass.keySet().retainAll(migrations);
SortedMap<TServerInstance, TabletServerStatus> currentCopy = new TreeMap<>(current);
Multimap<TServerInstance, String> serverTableIdCopied = HashMultimap.create();
for (TabletMigration migration : migrationsFromLastPass.values()) {
TableInfo fromInfo = getTableInfo(currentCopy, serverTableIdCopied, migration.tablet.getTableId().toString(), migration.oldServer);
if (fromInfo != null) {
fromInfo.setOnlineTablets(fromInfo.getOnlineTablets() - 1);
}
TableInfo toInfo = getTableInfo(currentCopy, serverTableIdCopied, migration.tablet.getTableId().toString(), migration.newServer);
if (toInfo != null) {
toInfo.setOnlineTablets(toInfo.getOnlineTablets() + 1);
}
}
migrations = EMPTY_MIGRATIONS;
} else {
migrationsFromLastPass.clear();
}
for (String s : tableIdMap.values()) {
Table.ID tableId = Table.ID.of(s);
String tableName = tableIdToTableName.get(tableId);
String regexTableName = getPoolNameForTable(tableName);
SortedMap<TServerInstance, TabletServerStatus> currentView = currentGrouped.get(regexTableName);
if (null == currentView) {
LOG.warn("Skipping balance for table {} as no tablet servers are online.", tableName);
continue;
}
ArrayList<TabletMigration> newMigrations = new ArrayList<>();
getBalancerForTable(tableId).balance(currentView, migrations, newMigrations);
if (newMigrations.isEmpty()) {
tableToTimeSinceNoMigrations.remove(s);
} else if (tableToTimeSinceNoMigrations.containsKey(s)) {
if ((now - tableToTimeSinceNoMigrations.get(s)) > ONE_HOUR) {
LOG.warn("We have been consistently producing migrations for {}: {}", tableName, Iterables.limit(newMigrations, 10));
}
} else {
tableToTimeSinceNoMigrations.put(s, now);
}
migrationsOut.addAll(newMigrations);
if (migrationsOut.size() >= this.maxTServerMigrations) {
break;
}
}
for (TabletMigration migration : migrationsOut) {
migrationsFromLastPass.put(migration.tablet, migration);
}
LOG.info("Migrating tablets for balance: {}", migrationsOut);
return minBalanceTime;
}
use of org.apache.accumulo.core.tabletserver.thrift.TabletStats in project accumulo by apache.
the class TabletServerResource method getTserverDetails.
/**
* Generates details for the selected tserver
*
* @param tserverAddress
* TServer name
* @return TServer details
*/
@Path("{address}")
@GET
public TabletServerSummary getTserverDetails(@PathParam("address") @NotNull @Pattern(regexp = SERVER_REGEX) String tserverAddress) throws Exception {
boolean tserverExists = false;
for (TabletServerStatus ts : Monitor.getMmi().getTServerInfo()) {
if (tserverAddress.equals(ts.getName())) {
tserverExists = true;
break;
}
}
if (!tserverExists) {
return null;
}
double totalElapsedForAll = 0;
double splitStdDev = 0;
double minorStdDev = 0;
double minorQueueStdDev = 0;
double majorStdDev = 0;
double majorQueueStdDev = 0;
double currentMinorAvg = 0;
double currentMajorAvg = 0;
double currentMinorStdDev = 0;
double currentMajorStdDev = 0;
total = new TabletStats(null, new ActionStats(), new ActionStats(), new ActionStats(), 0, 0, 0, 0);
HostAndPort address = HostAndPort.fromString(tserverAddress);
historical = new TabletStats(null, new ActionStats(), new ActionStats(), new ActionStats(), 0, 0, 0, 0);
List<TabletStats> tsStats = new ArrayList<>();
try {
ClientContext context = Monitor.getContext();
TabletClientService.Client client = ThriftUtil.getClient(new TabletClientService.Client.Factory(), address, context);
try {
for (String tableId : Monitor.getMmi().tableMap.keySet()) {
tsStats.addAll(client.getTabletStats(Tracer.traceInfo(), context.rpcCreds(), tableId));
}
historical = client.getHistoricalStats(Tracer.traceInfo(), context.rpcCreds());
} finally {
ThriftUtil.returnClient(client);
}
} catch (Exception e) {
return null;
}
List<CurrentOperations> currentOps = doCurrentOperations(tsStats);
if (total.minors.num != 0)
currentMinorAvg = (long) (total.minors.elapsed / total.minors.num);
if (total.minors.elapsed != 0 && total.minors.num != 0)
currentMinorStdDev = stddev(total.minors.elapsed, total.minors.num, total.minors.sumDev);
if (total.majors.num != 0)
currentMajorAvg = total.majors.elapsed / total.majors.num;
if (total.majors.elapsed != 0 && total.majors.num != 0 && total.majors.elapsed > total.majors.num)
currentMajorStdDev = stddev(total.majors.elapsed, total.majors.num, total.majors.sumDev);
ActionStatsUpdator.update(total.minors, historical.minors);
ActionStatsUpdator.update(total.majors, historical.majors);
totalElapsedForAll += total.majors.elapsed + historical.splits.elapsed + total.minors.elapsed;
minorStdDev = stddev(total.minors.elapsed, total.minors.num, total.minors.sumDev);
minorQueueStdDev = stddev(total.minors.queueTime, total.minors.num, total.minors.queueSumDev);
majorStdDev = stddev(total.majors.elapsed, total.majors.num, total.majors.sumDev);
majorQueueStdDev = stddev(total.majors.queueTime, total.majors.num, total.majors.queueSumDev);
splitStdDev = stddev(historical.splits.num, historical.splits.elapsed, historical.splits.sumDev);
TabletServerDetailInformation details = doDetails(address, tsStats.size());
List<AllTimeTabletResults> allTime = doAllTimeResults(majorQueueStdDev, minorQueueStdDev, totalElapsedForAll, splitStdDev, majorStdDev, minorStdDev);
CurrentTabletResults currentRes = doCurrentTabletResults(currentMinorAvg, currentMinorStdDev, currentMajorAvg, currentMajorStdDev);
TabletServerSummary tserverDetails = new TabletServerSummary(details, allTime, currentRes, currentOps);
return tserverDetails;
}
Aggregations