use of org.apache.accumulo.server.master.state.TServerInstance in project accumulo by apache.
the class FindOfflineTablets method findOffline.
static int findOffline(ClientContext context, String tableName) throws AccumuloException, TableNotFoundException {
final AtomicBoolean scanning = new AtomicBoolean(false);
LiveTServerSet tservers = new LiveTServerSet(context, new Listener() {
@Override
public void update(LiveTServerSet current, Set<TServerInstance> deleted, Set<TServerInstance> added) {
if (!deleted.isEmpty() && scanning.get())
log.warn("Tablet servers deleted while scanning: {}", deleted);
if (!added.isEmpty() && scanning.get())
log.warn("Tablet servers added while scanning: {}", added);
}
});
tservers.startListeningForTabletServerChanges();
scanning.set(true);
Iterator<TabletLocationState> zooScanner;
try {
zooScanner = new ZooTabletStateStore().iterator();
} catch (DistributedStoreException e) {
throw new AccumuloException(e);
}
int offline = 0;
System.out.println("Scanning zookeeper");
if ((offline = checkTablets(zooScanner, tservers)) > 0)
return offline;
if (RootTable.NAME.equals(tableName))
return 0;
System.out.println("Scanning " + RootTable.NAME);
Iterator<TabletLocationState> rootScanner = new MetaDataTableScanner(context, MetadataSchema.TabletsSection.getRange(), RootTable.NAME);
if ((offline = checkTablets(rootScanner, tservers)) > 0)
return offline;
if (MetadataTable.NAME.equals(tableName))
return 0;
System.out.println("Scanning " + MetadataTable.NAME);
Range range = MetadataSchema.TabletsSection.getRange();
if (tableName != null) {
Table.ID tableId = Tables.getTableId(context.getInstance(), tableName);
range = new KeyExtent(tableId, null, null).toMetadataRange();
}
try (MetaDataTableScanner metaScanner = new MetaDataTableScanner(context, range, MetadataTable.NAME)) {
return checkTablets(metaScanner, tservers);
}
}
use of org.apache.accumulo.server.master.state.TServerInstance in project accumulo by apache.
the class MasterRepairsDualAssignmentIT method test.
@Test
public void test() throws Exception {
// make some tablets, spread 'em around
Connector c = getConnector();
ClientContext context = new ClientContext(c.getInstance(), new Credentials("root", new PasswordToken(ROOT_PASSWORD)), getClientConfig());
String table = this.getUniqueNames(1)[0];
c.securityOperations().grantTablePermission("root", MetadataTable.NAME, TablePermission.WRITE);
c.securityOperations().grantTablePermission("root", RootTable.NAME, TablePermission.WRITE);
c.tableOperations().create(table);
SortedSet<Text> partitions = new TreeSet<>();
for (String part : "a b c d e f g h i j k l m n o p q r s t u v w x y z".split(" ")) {
partitions.add(new Text(part));
}
c.tableOperations().addSplits(table, partitions);
// scan the metadata table and get the two table location states
Set<TServerInstance> states = new HashSet<>();
Set<TabletLocationState> oldLocations = new HashSet<>();
MetaDataStateStore store = new MetaDataStateStore(context, null);
while (states.size() < 2) {
UtilWaitThread.sleep(250);
oldLocations.clear();
for (TabletLocationState tls : store) {
if (tls.current != null) {
states.add(tls.current);
oldLocations.add(tls);
}
}
}
assertEquals(2, states.size());
// Kill a tablet server... we don't care which one... wait for everything to be reassigned
cluster.killProcess(ServerType.TABLET_SERVER, cluster.getProcesses().get(ServerType.TABLET_SERVER).iterator().next());
Set<TServerInstance> replStates = new HashSet<>();
// Find out which tablet server remains
while (true) {
UtilWaitThread.sleep(1000);
states.clear();
replStates.clear();
boolean allAssigned = true;
for (TabletLocationState tls : store) {
if (tls != null && tls.current != null) {
states.add(tls.current);
} else if (tls != null && tls.extent.equals(new KeyExtent(ReplicationTable.ID, null, null))) {
replStates.add(tls.current);
} else {
allAssigned = false;
}
}
System.out.println(states + " size " + states.size() + " allAssigned " + allAssigned);
if (states.size() != 2 && allAssigned)
break;
}
assertEquals(1, replStates.size());
assertEquals(1, states.size());
// pick an assigned tablet and assign it to the old tablet
TabletLocationState moved = null;
for (TabletLocationState old : oldLocations) {
if (!states.contains(old.current)) {
moved = old;
}
}
assertNotEquals(null, moved);
// throw a mutation in as if we were the dying tablet
BatchWriter bw = c.createBatchWriter(MetadataTable.NAME, new BatchWriterConfig());
Mutation assignment = new Mutation(moved.extent.getMetadataEntry());
moved.current.putLocation(assignment);
bw.addMutation(assignment);
bw.close();
// wait for the master to fix the problem
waitForCleanStore(store);
// now jam up the metadata table
bw = c.createBatchWriter(MetadataTable.NAME, new BatchWriterConfig());
assignment = new Mutation(new KeyExtent(MetadataTable.ID, null, null).getMetadataEntry());
moved.current.putLocation(assignment);
bw.addMutation(assignment);
bw.close();
waitForCleanStore(new RootTabletStateStore(context, null));
}
use of org.apache.accumulo.server.master.state.TServerInstance in project accumulo by apache.
the class HostRegexTableLoadBalancer method balance.
@Override
public long balance(SortedMap<TServerInstance, TabletServerStatus> current, Set<KeyExtent> migrations, List<TabletMigration> migrationsOut) {
long minBalanceTime = 20 * 1000;
// Iterate over the tables and balance each of them
TableOperations t = getTableOperations();
if (t == null)
return minBalanceTime;
Map<String, String> tableIdMap = t.tableIdMap();
long now = System.currentTimeMillis();
Map<String, SortedMap<TServerInstance, TabletServerStatus>> currentGrouped = splitCurrentByRegex(current);
if ((now - this.lastOOBCheck) > this.oobCheckMillis) {
try {
// Check to see if a tablet is assigned outside the bounds of the pool. If so, migrate it.
for (String table : t.list()) {
LOG.debug("Checking for out of bounds tablets for table {}", table);
String tablePoolName = getPoolNameForTable(table);
for (Entry<TServerInstance, TabletServerStatus> e : current.entrySet()) {
// pool names are the same as table names, except in the DEFAULT case.
// If this table is assigned to a pool for this host, then move on.
List<String> hostPools = getPoolNamesForHost(e.getKey().host());
if (hostPools.contains(tablePoolName)) {
continue;
}
String tid = tableIdMap.get(table);
if (null == tid) {
LOG.warn("Unable to check for out of bounds tablets for table {}, it may have been deleted or renamed.", table);
continue;
}
try {
List<TabletStats> outOfBoundsTablets = getOnlineTabletsForTable(e.getKey(), Table.ID.of(tid));
if (null == outOfBoundsTablets) {
continue;
}
Random random = new Random();
for (TabletStats ts : outOfBoundsTablets) {
KeyExtent ke = new KeyExtent(ts.getExtent());
if (migrations.contains(ke)) {
LOG.debug("Migration for out of bounds tablet {} has already been requested", ke);
continue;
}
String poolName = getPoolNameForTable(table);
SortedMap<TServerInstance, TabletServerStatus> currentView = currentGrouped.get(poolName);
if (null != currentView) {
int skip = random.nextInt(currentView.size());
Iterator<TServerInstance> iter = currentView.keySet().iterator();
for (int i = 0; i < skip; i++) {
iter.next();
}
TServerInstance nextTS = iter.next();
LOG.info("Tablet {} is currently outside the bounds of the regex, migrating from {} to {}", ke, e.getKey(), nextTS);
migrationsOut.add(new TabletMigration(ke, e.getKey(), nextTS));
if (migrationsOut.size() >= this.maxTServerMigrations) {
break;
}
} else {
LOG.warn("No tablet servers online for pool {}, unable to migrate out of bounds tablets", poolName);
}
}
} catch (TException e1) {
LOG.error("Error in OOB check getting tablets for table {} from server {}", tid, e.getKey().host(), e);
}
}
}
} finally {
// this could have taken a while...get a new time
this.lastOOBCheck = System.currentTimeMillis();
}
}
if (migrationsOut.size() > 0) {
LOG.warn("Not balancing tables due to moving {} out of bounds tablets", migrationsOut.size());
LOG.info("Migrating out of bounds tablets: {}", migrationsOut);
return minBalanceTime;
}
if (migrations != null && migrations.size() > 0) {
if (migrations.size() >= maxOutstandingMigrations) {
LOG.warn("Not balancing tables due to {} outstanding migrations", migrations.size());
if (LOG.isTraceEnabled()) {
LOG.trace("Sample up to 10 outstanding migrations: {}", Iterables.limit(migrations, 10));
}
return minBalanceTime;
}
LOG.debug("Current outstanding migrations of {} being applied", migrations.size());
if (LOG.isTraceEnabled()) {
LOG.trace("Sample up to 10 outstanding migrations: {}", Iterables.limit(migrations, 10));
}
migrationsFromLastPass.keySet().retainAll(migrations);
SortedMap<TServerInstance, TabletServerStatus> currentCopy = new TreeMap<>(current);
Multimap<TServerInstance, String> serverTableIdCopied = HashMultimap.create();
for (TabletMigration migration : migrationsFromLastPass.values()) {
TableInfo fromInfo = getTableInfo(currentCopy, serverTableIdCopied, migration.tablet.getTableId().toString(), migration.oldServer);
if (fromInfo != null) {
fromInfo.setOnlineTablets(fromInfo.getOnlineTablets() - 1);
}
TableInfo toInfo = getTableInfo(currentCopy, serverTableIdCopied, migration.tablet.getTableId().toString(), migration.newServer);
if (toInfo != null) {
toInfo.setOnlineTablets(toInfo.getOnlineTablets() + 1);
}
}
migrations = EMPTY_MIGRATIONS;
} else {
migrationsFromLastPass.clear();
}
for (String s : tableIdMap.values()) {
Table.ID tableId = Table.ID.of(s);
String tableName = tableIdToTableName.get(tableId);
String regexTableName = getPoolNameForTable(tableName);
SortedMap<TServerInstance, TabletServerStatus> currentView = currentGrouped.get(regexTableName);
if (null == currentView) {
LOG.warn("Skipping balance for table {} as no tablet servers are online.", tableName);
continue;
}
ArrayList<TabletMigration> newMigrations = new ArrayList<>();
getBalancerForTable(tableId).balance(currentView, migrations, newMigrations);
if (newMigrations.isEmpty()) {
tableToTimeSinceNoMigrations.remove(s);
} else if (tableToTimeSinceNoMigrations.containsKey(s)) {
if ((now - tableToTimeSinceNoMigrations.get(s)) > ONE_HOUR) {
LOG.warn("We have been consistently producing migrations for {}: {}", tableName, Iterables.limit(newMigrations, 10));
}
} else {
tableToTimeSinceNoMigrations.put(s, now);
}
migrationsOut.addAll(newMigrations);
if (migrationsOut.size() >= this.maxTServerMigrations) {
break;
}
}
for (TabletMigration migration : migrationsOut) {
migrationsFromLastPass.put(migration.tablet, migration);
}
LOG.info("Migrating tablets for balance: {}", migrationsOut);
return minBalanceTime;
}
use of org.apache.accumulo.server.master.state.TServerInstance in project accumulo by apache.
the class Master method gatherTableInformation.
private SortedMap<TServerInstance, TabletServerStatus> gatherTableInformation(Set<TServerInstance> currentServers) {
long start = System.currentTimeMillis();
int threads = Math.max(getConfiguration().getCount(Property.MASTER_STATUS_THREAD_POOL_SIZE), 1);
ExecutorService tp = Executors.newFixedThreadPool(threads);
final SortedMap<TServerInstance, TabletServerStatus> result = new TreeMap<>();
for (TServerInstance serverInstance : currentServers) {
final TServerInstance server = serverInstance;
tp.submit(new Runnable() {
@Override
public void run() {
try {
Thread t = Thread.currentThread();
String oldName = t.getName();
try {
t.setName("Getting status from " + server);
TServerConnection connection = tserverSet.getConnection(server);
if (connection == null)
throw new IOException("No connection to " + server);
TabletServerStatus status = connection.getTableMap(false);
result.put(server, status);
} finally {
t.setName(oldName);
}
} catch (Exception ex) {
log.error("unable to get tablet server status {} {}", server, ex.toString());
log.debug("unable to get tablet server status {}", server, ex);
if (badServers.get(server).incrementAndGet() > MAX_BAD_STATUS_COUNT) {
log.warn("attempting to stop {}", server);
try {
TServerConnection connection = tserverSet.getConnection(server);
if (connection != null) {
connection.halt(masterLock);
}
} catch (TTransportException e) {
// ignore: it's probably down
} catch (Exception e) {
log.info("error talking to troublesome tablet server", e);
}
badServers.remove(server);
}
}
}
});
}
tp.shutdown();
try {
tp.awaitTermination(getConfiguration().getTimeInMillis(Property.TSERV_CLIENT_TIMEOUT) * 2, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
log.debug("Interrupted while fetching status");
}
synchronized (badServers) {
badServers.keySet().retainAll(currentServers);
badServers.keySet().removeAll(result.keySet());
}
log.debug(String.format("Finished gathering information from %d servers in %.2f seconds", result.size(), (System.currentTimeMillis() - start) / 1000.));
return result;
}
use of org.apache.accumulo.server.master.state.TServerInstance in project accumulo by apache.
the class Master method getGoalState.
TabletGoalState getGoalState(TabletLocationState tls, MergeInfo mergeInfo) {
KeyExtent extent = tls.extent;
// Shutting down?
TabletGoalState state = getSystemGoalState(tls);
if (state == TabletGoalState.HOSTED) {
if (tls.current != null && serversToShutdown.contains(tls.current)) {
return TabletGoalState.SUSPENDED;
}
// Handle merge transitions
if (mergeInfo.getExtent() != null) {
log.debug("mergeInfo overlaps: {} {}", extent, mergeInfo.overlaps(extent));
if (mergeInfo.overlaps(extent)) {
switch(mergeInfo.getState()) {
case NONE:
case COMPLETE:
break;
case STARTED:
case SPLITTING:
return TabletGoalState.HOSTED;
case WAITING_FOR_CHOPPED:
if (tls.getState(tserverSet.getCurrentServers()).equals(TabletState.HOSTED)) {
if (tls.chopped)
return TabletGoalState.UNASSIGNED;
} else {
if (tls.chopped && tls.walogs.isEmpty())
return TabletGoalState.UNASSIGNED;
}
return TabletGoalState.HOSTED;
case WAITING_FOR_OFFLINE:
case MERGING:
return TabletGoalState.UNASSIGNED;
}
}
}
// taking table offline?
state = getTableGoalState(extent);
if (state == TabletGoalState.HOSTED) {
// Maybe this tablet needs to be migrated
TServerInstance dest = migrations.get(extent);
if (dest != null && tls.current != null && !dest.equals(tls.current)) {
return TabletGoalState.UNASSIGNED;
}
}
}
return state;
}
Aggregations