use of org.apache.accumulo.server.master.state.Assignment in project accumulo by apache.
the class TabletGroupWatcher method run.
@Override
public void run() {
Thread.currentThread().setName("Watching " + store.name());
int[] oldCounts = new int[TabletState.values().length];
EventCoordinator.Listener eventListener = this.master.nextEvent.getListener();
WalStateManager wals = new WalStateManager(master.getInstance(), ZooReaderWriter.getInstance());
while (this.master.stillMaster()) {
// slow things down a little, otherwise we spam the logs when there are many wake-up events
sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
masterState = master.getMasterState();
int totalUnloaded = 0;
int unloaded = 0;
ClosableIterator<TabletLocationState> iter = null;
try {
Map<Table.ID, MergeStats> mergeStatsCache = new HashMap<>();
Map<Table.ID, MergeStats> currentMerges = new HashMap<>();
for (MergeInfo merge : master.merges()) {
if (merge.getExtent() != null) {
currentMerges.put(merge.getExtent().getTableId(), new MergeStats(merge));
}
}
// Get the current status for the current list of tservers
SortedMap<TServerInstance, TabletServerStatus> currentTServers = new TreeMap<>();
for (TServerInstance entry : this.master.tserverSet.getCurrentServers()) {
currentTServers.put(entry, this.master.tserverStatus.get(entry));
}
if (currentTServers.size() == 0) {
eventListener.waitForEvents(Master.TIME_TO_WAIT_BETWEEN_SCANS);
synchronized (this) {
lastScanServers = ImmutableSortedSet.of();
}
continue;
}
// Don't move tablets to servers that are shutting down
SortedMap<TServerInstance, TabletServerStatus> destinations = new TreeMap<>(currentTServers);
destinations.keySet().removeAll(this.master.serversToShutdown);
List<Assignment> assignments = new ArrayList<>();
List<Assignment> assigned = new ArrayList<>();
List<TabletLocationState> assignedToDeadServers = new ArrayList<>();
List<TabletLocationState> suspendedToGoneServers = new ArrayList<>();
Map<KeyExtent, TServerInstance> unassigned = new HashMap<>();
Map<TServerInstance, List<Path>> logsForDeadServers = new TreeMap<>();
MasterState masterState = master.getMasterState();
int[] counts = new int[TabletState.values().length];
stats.begin();
// Walk through the tablets in our store, and work tablets
// towards their goal
iter = store.iterator();
while (iter.hasNext()) {
TabletLocationState tls = iter.next();
if (tls == null) {
continue;
}
Master.log.debug("{} location State: {}", store.name(), tls);
// ignore entries for tables that do not exist in zookeeper
if (TableManager.getInstance().getTableState(tls.extent.getTableId()) == null)
continue;
if (Master.log.isTraceEnabled())
Master.log.trace("{} walogs {}", tls, tls.walogs.size());
// Don't overwhelm the tablet servers with work
if (unassigned.size() + unloaded > Master.MAX_TSERVER_WORK_CHUNK * currentTServers.size()) {
flushChanges(destinations, assignments, assigned, assignedToDeadServers, logsForDeadServers, suspendedToGoneServers, unassigned);
assignments.clear();
assigned.clear();
assignedToDeadServers.clear();
suspendedToGoneServers.clear();
unassigned.clear();
unloaded = 0;
eventListener.waitForEvents(Master.TIME_TO_WAIT_BETWEEN_SCANS);
}
Table.ID tableId = tls.extent.getTableId();
TableConfiguration tableConf = this.master.getConfigurationFactory().getTableConfiguration(tableId);
MergeStats mergeStats = mergeStatsCache.get(tableId);
if (mergeStats == null) {
mergeStats = currentMerges.get(tableId);
if (mergeStats == null) {
mergeStats = new MergeStats(new MergeInfo());
}
mergeStatsCache.put(tableId, mergeStats);
}
TabletGoalState goal = this.master.getGoalState(tls, mergeStats.getMergeInfo());
TServerInstance server = tls.getServer();
TabletState state = tls.getState(currentTServers.keySet());
if (Master.log.isTraceEnabled()) {
Master.log.trace("Goal state {} current {} for {}", goal, state, tls.extent);
}
stats.update(tableId, state);
mergeStats.update(tls.extent, state, tls.chopped, !tls.walogs.isEmpty());
sendChopRequest(mergeStats.getMergeInfo(), state, tls);
sendSplitRequest(mergeStats.getMergeInfo(), state, tls);
// Always follow through with assignments
if (state == TabletState.ASSIGNED) {
goal = TabletGoalState.HOSTED;
}
// if we are shutting down all the tabletservers, we have to do it in order
if (goal == TabletGoalState.SUSPENDED && state == TabletState.HOSTED) {
if (this.master.serversToShutdown.equals(currentTServers.keySet())) {
if (dependentWatcher != null && dependentWatcher.assignedOrHosted() > 0) {
goal = TabletGoalState.HOSTED;
}
}
}
if (goal == TabletGoalState.HOSTED) {
if (state != TabletState.HOSTED && !tls.walogs.isEmpty()) {
if (this.master.recoveryManager.recoverLogs(tls.extent, tls.walogs))
continue;
}
switch(state) {
case HOSTED:
if (server.equals(this.master.migrations.get(tls.extent)))
this.master.migrations.remove(tls.extent);
break;
case ASSIGNED_TO_DEAD_SERVER:
assignedToDeadServers.add(tls);
if (server.equals(this.master.migrations.get(tls.extent)))
this.master.migrations.remove(tls.extent);
TServerInstance tserver = tls.futureOrCurrent();
if (!logsForDeadServers.containsKey(tserver)) {
logsForDeadServers.put(tserver, wals.getWalsInUse(tserver));
}
break;
case SUSPENDED:
if (master.getSteadyTime() - tls.suspend.suspensionTime < tableConf.getTimeInMillis(Property.TABLE_SUSPEND_DURATION)) {
// Tablet is suspended. See if its tablet server is back.
TServerInstance returnInstance = null;
Iterator<TServerInstance> find = destinations.tailMap(new TServerInstance(tls.suspend.server, " ")).keySet().iterator();
if (find.hasNext()) {
TServerInstance found = find.next();
if (found.getLocation().equals(tls.suspend.server)) {
returnInstance = found;
}
}
// Old tablet server is back. Return this tablet to its previous owner.
if (returnInstance != null) {
assignments.add(new Assignment(tls.extent, returnInstance));
} else {
// leave suspended, don't ask for a new assignment.
}
} else {
// Treat as unassigned, ask for a new assignment.
unassigned.put(tls.extent, server);
}
break;
case UNASSIGNED:
// maybe it's a finishing migration
TServerInstance dest = this.master.migrations.get(tls.extent);
if (dest != null) {
// if destination is still good, assign it
if (destinations.keySet().contains(dest)) {
assignments.add(new Assignment(tls.extent, dest));
} else {
// get rid of this migration
this.master.migrations.remove(tls.extent);
unassigned.put(tls.extent, server);
}
} else {
unassigned.put(tls.extent, server);
}
break;
case ASSIGNED:
// Send another reminder
assigned.add(new Assignment(tls.extent, tls.future));
break;
}
} else {
switch(state) {
case SUSPENDED:
// Request a move to UNASSIGNED, so as to allow balancing to continue.
suspendedToGoneServers.add(tls);
cancelOfflineTableMigrations(tls);
break;
case UNASSIGNED:
cancelOfflineTableMigrations(tls);
break;
case ASSIGNED_TO_DEAD_SERVER:
assignedToDeadServers.add(tls);
if (!logsForDeadServers.containsKey(tls.futureOrCurrent())) {
logsForDeadServers.put(tls.futureOrCurrent(), wals.getWalsInUse(tls.futureOrCurrent()));
}
break;
case HOSTED:
TServerConnection conn = this.master.tserverSet.getConnection(server);
if (conn != null) {
conn.unloadTablet(this.master.masterLock, tls.extent, goal.howUnload(), master.getSteadyTime());
unloaded++;
totalUnloaded++;
} else {
Master.log.warn("Could not connect to server {}", server);
}
break;
case ASSIGNED:
break;
}
}
counts[state.ordinal()]++;
}
flushChanges(destinations, assignments, assigned, assignedToDeadServers, logsForDeadServers, suspendedToGoneServers, unassigned);
// provide stats after flushing changes to avoid race conditions w/ delete table
stats.end(masterState);
// Report changes
for (TabletState state : TabletState.values()) {
int i = state.ordinal();
if (counts[i] > 0 && counts[i] != oldCounts[i]) {
this.master.nextEvent.event("[%s]: %d tablets are %s", store.name(), counts[i], state.name());
}
}
Master.log.debug(String.format("[%s]: scan time %.2f seconds", store.name(), stats.getScanTime() / 1000.));
oldCounts = counts;
if (totalUnloaded > 0) {
this.master.nextEvent.event("[%s]: %d tablets unloaded", store.name(), totalUnloaded);
}
updateMergeState(mergeStatsCache);
synchronized (this) {
lastScanServers = ImmutableSortedSet.copyOf(currentTServers.keySet());
}
if (this.master.tserverSet.getCurrentServers().equals(currentTServers.keySet())) {
Master.log.debug(String.format("[%s] sleeping for %.2f seconds", store.name(), Master.TIME_TO_WAIT_BETWEEN_SCANS / 1000.));
eventListener.waitForEvents(Master.TIME_TO_WAIT_BETWEEN_SCANS);
} else {
Master.log.info("Detected change in current tserver set, re-running state machine.");
}
} catch (Exception ex) {
Master.log.error("Error processing table state for store " + store.name(), ex);
if (ex.getCause() != null && ex.getCause() instanceof BadLocationStateException) {
repairMetadata(((BadLocationStateException) ex.getCause()).getEncodedEndRow());
} else {
sleepUninterruptibly(Master.WAIT_BETWEEN_ERRORS, TimeUnit.MILLISECONDS);
}
} finally {
if (iter != null) {
try {
iter.close();
} catch (IOException ex) {
Master.log.warn("Error closing TabletLocationState iterator: " + ex, ex);
}
}
}
}
}
use of org.apache.accumulo.server.master.state.Assignment in project accumulo by apache.
the class RootTabletStateStoreTest method testRootTabletStateStore.
@Test
public void testRootTabletStateStore() throws DistributedStoreException {
ZooTabletStateStore tstore = new ZooTabletStateStore(new FakeZooStore());
KeyExtent root = RootTable.EXTENT;
String sessionId = "this is my unique session data";
TServerInstance server = new TServerInstance(HostAndPort.fromParts("127.0.0.1", 10000), sessionId);
List<Assignment> assignments = Collections.singletonList(new Assignment(root, server));
tstore.setFutureLocations(assignments);
int count = 0;
for (TabletLocationState location : tstore) {
assertEquals(location.extent, root);
assertEquals(location.future, server);
assertNull(location.current);
count++;
}
assertEquals(count, 1);
tstore.setLocations(assignments);
count = 0;
for (TabletLocationState location : tstore) {
assertEquals(location.extent, root);
assertNull(location.future);
assertEquals(location.current, server);
count++;
}
assertEquals(count, 1);
TabletLocationState assigned = null;
try {
assigned = new TabletLocationState(root, server, null, null, null, null, false);
} catch (BadLocationStateException e) {
fail("Unexpected error " + e);
}
tstore.unassign(Collections.singletonList(assigned), null);
count = 0;
for (TabletLocationState location : tstore) {
assertEquals(location.extent, root);
assertNull(location.future);
assertNull(location.current);
count++;
}
assertEquals(count, 1);
KeyExtent notRoot = new KeyExtent(Table.ID.of("0"), null, null);
try {
tstore.setLocations(Collections.singletonList(new Assignment(notRoot, server)));
Assert.fail("should not get here");
} catch (IllegalArgumentException ex) {
}
try {
tstore.setFutureLocations(Collections.singletonList(new Assignment(notRoot, server)));
Assert.fail("should not get here");
} catch (IllegalArgumentException ex) {
}
TabletLocationState broken = null;
try {
broken = new TabletLocationState(notRoot, server, null, null, null, null, false);
} catch (BadLocationStateException e) {
fail("Unexpected error " + e);
}
try {
tstore.unassign(Collections.singletonList(broken), null);
Assert.fail("should not get here");
} catch (IllegalArgumentException ex) {
}
}
use of org.apache.accumulo.server.master.state.Assignment in project accumulo by apache.
the class TabletGroupWatcher method flushChanges.
private void flushChanges(SortedMap<TServerInstance, TabletServerStatus> currentTServers, List<Assignment> assignments, List<Assignment> assigned, List<TabletLocationState> assignedToDeadServers, Map<TServerInstance, List<Path>> logsForDeadServers, List<TabletLocationState> suspendedToGoneServers, Map<KeyExtent, TServerInstance> unassigned) throws DistributedStoreException, TException, WalMarkerException {
boolean tabletsSuspendable = canSuspendTablets();
if (!assignedToDeadServers.isEmpty()) {
int maxServersToShow = min(assignedToDeadServers.size(), 100);
Master.log.debug("{} assigned to dead servers: {}...", assignedToDeadServers.size(), assignedToDeadServers.subList(0, maxServersToShow));
Master.log.debug("logs for dead servers: {}", logsForDeadServers);
if (tabletsSuspendable) {
store.suspend(assignedToDeadServers, logsForDeadServers, master.getSteadyTime());
} else {
store.unassign(assignedToDeadServers, logsForDeadServers);
}
this.master.markDeadServerLogsAsClosed(logsForDeadServers);
this.master.nextEvent.event("Marked %d tablets as suspended because they don't have current servers", assignedToDeadServers.size());
}
if (!suspendedToGoneServers.isEmpty()) {
int maxServersToShow = min(assignedToDeadServers.size(), 100);
Master.log.debug(assignedToDeadServers.size() + " suspended to gone servers: " + assignedToDeadServers.subList(0, maxServersToShow) + "...");
store.unsuspend(suspendedToGoneServers);
}
if (!currentTServers.isEmpty()) {
Map<KeyExtent, TServerInstance> assignedOut = new HashMap<>();
final StringBuilder builder = new StringBuilder(64);
this.master.tabletBalancer.getAssignments(Collections.unmodifiableSortedMap(currentTServers), Collections.unmodifiableMap(unassigned), assignedOut);
for (Entry<KeyExtent, TServerInstance> assignment : assignedOut.entrySet()) {
if (unassigned.containsKey(assignment.getKey())) {
if (assignment.getValue() != null) {
if (!currentTServers.containsKey(assignment.getValue())) {
Master.log.warn("balancer assigned {} to a tablet server that is not current {} ignoring", assignment.getKey(), assignment.getValue());
continue;
}
if (builder.length() > 0) {
builder.append(ASSIGNMENT_BUFFER_SEPARATOR);
}
builder.append(assignment);
// Don't let the log message get too gigantic
if (builder.length() > ASSINGMENT_BUFFER_MAX_LENGTH) {
builder.append("]");
Master.log.debug("{} assigning tablets: [{}", store.name(), builder.toString());
builder.setLength(0);
}
assignments.add(new Assignment(assignment.getKey(), assignment.getValue()));
}
} else {
Master.log.warn("{} load balancer assigning tablet that was not nominated for assignment {}", store.name(), assignment.getKey());
}
}
if (builder.length() > 0) {
// Make sure to log any leftover assignments
builder.append("]");
Master.log.debug("{} assigning tablets: [{}", store.name(), builder.toString());
}
if (!unassigned.isEmpty() && assignedOut.isEmpty())
Master.log.warn("Load balancer failed to assign any tablets");
}
if (assignments.size() > 0) {
Master.log.info(String.format("Assigning %d tablets", assignments.size()));
store.setFutureLocations(assignments);
}
assignments.addAll(assigned);
for (Assignment a : assignments) {
TServerConnection conn = this.master.tserverSet.getConnection(a.server);
if (conn != null) {
conn.assignTablet(this.master.masterLock, a.tablet);
} else {
Master.log.warn("Could not connect to server {}", a.server);
}
master.assignedTablet(a.tablet);
}
}
use of org.apache.accumulo.server.master.state.Assignment in project accumulo by apache.
the class MergeStateIT method test.
@Test
public void test() throws Exception {
AccumuloServerContext context = EasyMock.createMock(AccumuloServerContext.class);
Connector connector = getConnector();
EasyMock.expect(context.getConnector()).andReturn(connector).anyTimes();
EasyMock.replay(context);
connector.securityOperations().grantTablePermission(connector.whoami(), MetadataTable.NAME, TablePermission.WRITE);
BatchWriter bw = connector.createBatchWriter(MetadataTable.NAME, new BatchWriterConfig());
// Create a fake METADATA table with these splits
String[] splits = { "a", "e", "j", "o", "t", "z" };
// create metadata for a table "t" with the splits above
Table.ID tableId = Table.ID.of("t");
Text pr = null;
for (String s : splits) {
Text split = new Text(s);
Mutation prevRow = KeyExtent.getPrevRowUpdateMutation(new KeyExtent(tableId, split, pr));
prevRow.put(TabletsSection.CurrentLocationColumnFamily.NAME, new Text("123456"), new Value("127.0.0.1:1234".getBytes()));
ChoppedColumnFamily.CHOPPED_COLUMN.put(prevRow, new Value("junk".getBytes()));
bw.addMutation(prevRow);
pr = split;
}
// Add the default tablet
Mutation defaultTablet = KeyExtent.getPrevRowUpdateMutation(new KeyExtent(tableId, null, pr));
defaultTablet.put(TabletsSection.CurrentLocationColumnFamily.NAME, new Text("123456"), new Value("127.0.0.1:1234".getBytes()));
bw.addMutation(defaultTablet);
bw.close();
// Read out the TabletLocationStates
MockCurrentState state = new MockCurrentState(new MergeInfo(new KeyExtent(tableId, new Text("p"), new Text("e")), MergeInfo.Operation.MERGE));
// Verify the tablet state: hosted, and count
MetaDataStateStore metaDataStateStore = new MetaDataStateStore(context, state);
int count = 0;
for (TabletLocationState tss : metaDataStateStore) {
if (tss != null)
count++;
}
// the normal case is to skip tablets in a good state
Assert.assertEquals(0, count);
// Create the hole
// Split the tablet at one end of the range
Mutation m = new KeyExtent(tableId, new Text("t"), new Text("p")).getPrevRowUpdateMutation();
TabletsSection.TabletColumnFamily.SPLIT_RATIO_COLUMN.put(m, new Value("0.5".getBytes()));
TabletsSection.TabletColumnFamily.OLD_PREV_ROW_COLUMN.put(m, KeyExtent.encodePrevEndRow(new Text("o")));
update(connector, m);
// do the state check
MergeStats stats = scan(state, metaDataStateStore);
MergeState newState = stats.nextMergeState(connector, state);
Assert.assertEquals(MergeState.WAITING_FOR_OFFLINE, newState);
// unassign the tablets
BatchDeleter deleter = connector.createBatchDeleter(MetadataTable.NAME, Authorizations.EMPTY, 1000, new BatchWriterConfig());
deleter.fetchColumnFamily(TabletsSection.CurrentLocationColumnFamily.NAME);
deleter.setRanges(Collections.singletonList(new Range()));
deleter.delete();
// now we should be ready to merge but, we have inconsistent metadata
stats = scan(state, metaDataStateStore);
Assert.assertEquals(MergeState.WAITING_FOR_OFFLINE, stats.nextMergeState(connector, state));
// finish the split
KeyExtent tablet = new KeyExtent(tableId, new Text("p"), new Text("o"));
m = tablet.getPrevRowUpdateMutation();
TabletsSection.TabletColumnFamily.SPLIT_RATIO_COLUMN.put(m, new Value("0.5".getBytes()));
update(connector, m);
metaDataStateStore.setLocations(Collections.singletonList(new Assignment(tablet, state.someTServer)));
// onos... there's a new tablet online
stats = scan(state, metaDataStateStore);
Assert.assertEquals(MergeState.WAITING_FOR_CHOPPED, stats.nextMergeState(connector, state));
// chop it
m = tablet.getPrevRowUpdateMutation();
ChoppedColumnFamily.CHOPPED_COLUMN.put(m, new Value("junk".getBytes()));
update(connector, m);
stats = scan(state, metaDataStateStore);
Assert.assertEquals(MergeState.WAITING_FOR_OFFLINE, stats.nextMergeState(connector, state));
// take it offline
m = tablet.getPrevRowUpdateMutation();
Collection<Collection<String>> walogs = Collections.emptyList();
metaDataStateStore.unassign(Collections.singletonList(new TabletLocationState(tablet, null, state.someTServer, null, null, walogs, false)), null);
// now we can split
stats = scan(state, metaDataStateStore);
Assert.assertEquals(MergeState.MERGING, stats.nextMergeState(connector, state));
}
use of org.apache.accumulo.server.master.state.Assignment in project accumulo by apache.
the class NullTserver method main.
public static void main(String[] args) throws Exception {
Opts opts = new Opts();
opts.parseArgs(NullTserver.class.getName(), args);
// modify metadata
ZooKeeperInstance zki = new ZooKeeperInstance(ClientConfiguration.create().withInstance(opts.iname).withZkHosts(opts.keepers));
Instance inst = HdfsZooInstance.getInstance();
AccumuloServerContext context = new AccumuloServerContext(inst, new ServerConfigurationFactory(zki));
TransactionWatcher watcher = new TransactionWatcher();
ThriftClientHandler tch = new ThriftClientHandler(new AccumuloServerContext(inst, new ServerConfigurationFactory(inst)), watcher);
Processor<Iface> processor = new Processor<>(tch);
TServerUtils.startTServer(context.getConfiguration(), ThriftServerType.CUSTOM_HS_HA, processor, "NullTServer", "null tserver", 2, 1, 1000, 10 * 1024 * 1024, null, null, -1, HostAndPort.fromParts("0.0.0.0", opts.port));
HostAndPort addr = HostAndPort.fromParts(InetAddress.getLocalHost().getHostName(), opts.port);
Table.ID tableId = Tables.getTableId(zki, opts.tableName);
// read the locations for the table
Range tableRange = new KeyExtent(tableId, null, null).toMetadataRange();
List<Assignment> assignments = new ArrayList<>();
try (MetaDataTableScanner s = new MetaDataTableScanner(context, tableRange)) {
long randomSessionID = opts.port;
TServerInstance instance = new TServerInstance(addr, randomSessionID);
while (s.hasNext()) {
TabletLocationState next = s.next();
assignments.add(new Assignment(next.extent, instance));
}
}
// point them to this server
MetaDataStateStore store = new MetaDataStateStore(context);
store.setLocations(assignments);
while (true) {
sleepUninterruptibly(10, TimeUnit.SECONDS);
}
}
Aggregations