use of org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException in project accumulo by apache.
the class TabletStateChangeIterator method consume.
@Override
protected void consume() throws IOException {
while (getSource().hasTop()) {
Key k = getSource().getTopKey();
Value v = getSource().getTopValue();
if (onlineTables == null || current == null || managerState != ManagerState.NORMAL)
return;
TabletLocationState tls;
try {
tls = MetaDataTableScanner.createTabletLocationState(k, v);
if (tls == null)
return;
} catch (BadLocationStateException e) {
// maybe the manager can do something with a tablet with bad/inconsistent state
return;
}
// we always want data about merges
MergeInfo merge = merges.get(tls.extent.tableId());
if (merge != null) {
// could make this smarter by only returning if the tablet is involved in the merge
return;
}
// always return the information for migrating tablets
if (migrations.contains(tls.extent)) {
return;
}
// is the table supposed to be online or offline?
boolean shouldBeOnline = onlineTables.contains(tls.extent.tableId());
if (debug) {
log.debug("{} is {} and should be {} line", tls.extent, tls.getState(current), (shouldBeOnline ? "on" : "off"));
}
switch(tls.getState(current)) {
case ASSIGNED:
// we always want data about assigned tablets
return;
case HOSTED:
if (!shouldBeOnline)
return;
break;
case ASSIGNED_TO_DEAD_SERVER:
return;
case SUSPENDED:
case UNASSIGNED:
if (shouldBeOnline)
return;
break;
default:
throw new AssertionError("Inconceivable! The tablet is an unrecognized state: " + tls.getState(current));
}
// table is in the expected state so don't bother returning any information about it
getSource().next();
}
}
use of org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException in project accumulo by apache.
the class UnloadTabletHandler method run.
@Override
public void run() {
Tablet t = null;
synchronized (server.unopenedTablets) {
if (server.unopenedTablets.contains(extent)) {
server.unopenedTablets.remove(extent);
// enqueueManagerMessage(new TabletUnloadedMessage(extent));
return;
}
}
synchronized (server.openingTablets) {
while (server.openingTablets.contains(extent)) {
try {
log.info("Waiting for tablet {} to finish opening before unloading.", extent);
server.openingTablets.wait();
} catch (InterruptedException e) {
}
}
}
synchronized (server.onlineTablets) {
if (server.onlineTablets.snapshot().containsKey(extent)) {
t = server.onlineTablets.snapshot().get(extent);
}
}
if (t == null) {
// unload request is crossing the successful unloaded message
if (!server.recentlyUnloadedCache.containsKey(extent)) {
log.info("told to unload tablet that was not being served {}", extent);
server.enqueueManagerMessage(new TabletStatusMessage(TabletLoadState.UNLOAD_FAILURE_NOT_SERVING, extent));
}
return;
}
try {
t.close(!goalState.equals(TUnloadTabletGoal.DELETED));
} catch (Exception e) {
if ((t.isClosing() || t.isClosed()) && e instanceof IllegalStateException) {
log.debug("Failed to unload tablet {}... it was already closing or closed : {}", extent, e.getMessage());
} else {
log.error("Failed to close tablet {}... Aborting migration", extent, e);
server.enqueueManagerMessage(new TabletStatusMessage(TabletLoadState.UNLOAD_ERROR, extent));
}
return;
}
// stop serving tablet - client will get not serving tablet
// exceptions
server.recentlyUnloadedCache.put(extent, System.currentTimeMillis());
server.onlineTablets.remove(extent);
try {
TServerInstance instance = new TServerInstance(server.clientAddress, server.getLock().getSessionId());
TabletLocationState tls = null;
try {
tls = new TabletLocationState(extent, null, instance, null, null, null, false);
} catch (BadLocationStateException e) {
log.error("Unexpected error", e);
}
if (!goalState.equals(TUnloadTabletGoal.SUSPENDED) || extent.isRootTablet() || (extent.isMeta() && !server.getConfiguration().getBoolean(Property.MANAGER_METADATA_SUSPENDABLE))) {
TabletStateStore.unassign(server.getContext(), tls, null);
} else {
TabletStateStore.suspend(server.getContext(), tls, null, requestTimeSkew + NANOSECONDS.toMillis(System.nanoTime()));
}
} catch (DistributedStoreException ex) {
log.warn("Unable to update storage", ex);
} catch (KeeperException e) {
log.warn("Unable determine our zookeeper session information", e);
} catch (InterruptedException e) {
log.warn("Interrupted while getting our zookeeper session information", e);
}
// tell the manager how it went
server.enqueueManagerMessage(new TabletStatusMessage(TabletLoadState.UNLOADED, extent));
// roll tablet stats over into tablet server's statsKeeper object as
// historical data
server.statsKeeper.saveMajorMinorTimes(t.getTabletStats());
}
use of org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException in project accumulo by apache.
the class RootTabletStateStoreTest method testRootTabletStateStore.
@Test
public void testRootTabletStateStore() throws DistributedStoreException {
ZooTabletStateStore tstore = new ZooTabletStateStore(new TestAmple());
KeyExtent root = RootTable.EXTENT;
String sessionId = "this is my unique session data";
TServerInstance server = new TServerInstance(HostAndPort.fromParts("127.0.0.1", 10000), sessionId);
List<Assignment> assignments = Collections.singletonList(new Assignment(root, server));
tstore.setFutureLocations(assignments);
int count = 0;
for (TabletLocationState location : tstore) {
assertEquals(location.extent, root);
assertEquals(location.future, server);
assertNull(location.current);
count++;
}
assertEquals(count, 1);
tstore.setLocations(assignments);
count = 0;
for (TabletLocationState location : tstore) {
assertEquals(location.extent, root);
assertNull(location.future);
assertEquals(location.current, server);
count++;
}
assertEquals(count, 1);
TabletLocationState assigned = null;
try {
assigned = new TabletLocationState(root, server, null, null, null, null, false);
} catch (BadLocationStateException e) {
fail("Unexpected error " + e);
}
tstore.unassign(Collections.singletonList(assigned), null);
count = 0;
for (TabletLocationState location : tstore) {
assertEquals(location.extent, root);
assertNull(location.future);
assertNull(location.current);
count++;
}
assertEquals(count, 1);
KeyExtent notRoot = new KeyExtent(TableId.of("0"), null, null);
final var assignmentList = List.of(new Assignment(notRoot, server));
assertThrows(IllegalArgumentException.class, () -> tstore.setLocations(assignmentList));
assertThrows(IllegalArgumentException.class, () -> tstore.setFutureLocations(assignmentList));
try {
TabletLocationState broken = new TabletLocationState(notRoot, server, null, null, null, null, false);
final var assignmentList1 = List.of(broken);
assertThrows(IllegalArgumentException.class, () -> tstore.unassign(assignmentList1, null));
} catch (BadLocationStateException e) {
fail("Unexpected error " + e);
}
}
use of org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException in project accumulo by apache.
the class MetaDataTableScanner method createTabletLocationState.
public static TabletLocationState createTabletLocationState(Key k, Value v) throws IOException, BadLocationStateException {
final SortedMap<Key, Value> decodedRow = WholeRowIterator.decodeRow(k, v);
KeyExtent extent = null;
TServerInstance future = null;
TServerInstance current = null;
TServerInstance last = null;
SuspendingTServer suspend = null;
long lastTimestamp = 0;
List<Collection<String>> walogs = new ArrayList<>();
boolean chopped = false;
for (Entry<Key, Value> entry : decodedRow.entrySet()) {
Key key = entry.getKey();
Text row = key.getRow();
Text cf = key.getColumnFamily();
Text cq = key.getColumnQualifier();
if (cf.compareTo(FutureLocationColumnFamily.NAME) == 0) {
TServerInstance location = new TServerInstance(entry.getValue(), cq);
if (future != null) {
throw new BadLocationStateException("found two assignments for the same extent " + row + ": " + future + " and " + location, row);
}
future = location;
} else if (cf.compareTo(CurrentLocationColumnFamily.NAME) == 0) {
TServerInstance location = new TServerInstance(entry.getValue(), cq);
if (current != null) {
throw new BadLocationStateException("found two locations for the same extent " + row + ": " + current + " and " + location, row);
}
current = location;
} else if (cf.compareTo(LogColumnFamily.NAME) == 0) {
String[] split = entry.getValue().toString().split("\\|")[0].split(";");
walogs.add(Arrays.asList(split));
} else if (cf.compareTo(LastLocationColumnFamily.NAME) == 0) {
if (lastTimestamp < entry.getKey().getTimestamp()) {
last = new TServerInstance(entry.getValue(), cq);
}
} else if (cf.compareTo(ChoppedColumnFamily.NAME) == 0) {
chopped = true;
} else if (TabletColumnFamily.PREV_ROW_COLUMN.equals(cf, cq)) {
extent = KeyExtent.fromMetaPrevRow(entry);
} else if (SuspendLocationColumn.SUSPEND_COLUMN.equals(cf, cq)) {
suspend = SuspendingTServer.fromValue(entry.getValue());
}
}
if (extent == null) {
String msg = "No prev-row for key extent " + decodedRow;
log.error(msg);
throw new BadLocationStateException(msg, k.getRow());
}
return new TabletLocationState(extent, future, current, last, suspend, walogs, chopped);
}
use of org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException in project accumulo by apache.
the class TabletGroupWatcher method run.
@Override
public void run() {
int[] oldCounts = new int[TabletState.values().length];
EventCoordinator.Listener eventListener = this.manager.nextEvent.getListener();
WalStateManager wals = new WalStateManager(manager.getContext());
while (manager.stillManager()) {
// slow things down a little, otherwise we spam the logs when there are many wake-up events
sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
int totalUnloaded = 0;
int unloaded = 0;
ClosableIterator<TabletLocationState> iter = null;
try {
Map<TableId, MergeStats> mergeStatsCache = new HashMap<>();
Map<TableId, MergeStats> currentMerges = new HashMap<>();
for (MergeInfo merge : manager.merges()) {
if (merge.getExtent() != null) {
currentMerges.put(merge.getExtent().tableId(), new MergeStats(merge));
}
}
// Get the current status for the current list of tservers
SortedMap<TServerInstance, TabletServerStatus> currentTServers = new TreeMap<>();
for (TServerInstance entry : manager.tserverSet.getCurrentServers()) {
currentTServers.put(entry, manager.tserverStatus.get(entry));
}
if (currentTServers.isEmpty()) {
eventListener.waitForEvents(Manager.TIME_TO_WAIT_BETWEEN_SCANS);
synchronized (this) {
lastScanServers = Collections.emptySortedSet();
}
continue;
}
TabletLists tLists = new TabletLists(manager, currentTServers);
ManagerState managerState = manager.getManagerState();
int[] counts = new int[TabletState.values().length];
stats.begin();
// Walk through the tablets in our store, and work tablets
// towards their goal
iter = store.iterator();
while (iter.hasNext()) {
TabletLocationState tls = iter.next();
if (tls == null) {
continue;
}
// ignore entries for tables that do not exist in zookeeper
if (manager.getTableManager().getTableState(tls.extent.tableId()) == null)
continue;
// Don't overwhelm the tablet servers with work
if (tLists.unassigned.size() + unloaded > Manager.MAX_TSERVER_WORK_CHUNK * currentTServers.size()) {
flushChanges(tLists, wals);
tLists.reset();
unloaded = 0;
eventListener.waitForEvents(Manager.TIME_TO_WAIT_BETWEEN_SCANS);
}
TableId tableId = tls.extent.tableId();
TableConfiguration tableConf = manager.getContext().getTableConfiguration(tableId);
MergeStats mergeStats = mergeStatsCache.computeIfAbsent(tableId, k -> {
var mStats = currentMerges.get(k);
return mStats != null ? mStats : new MergeStats(new MergeInfo());
});
TabletGoalState goal = manager.getGoalState(tls, mergeStats.getMergeInfo());
TServerInstance location = tls.getLocation();
TabletState state = tls.getState(currentTServers.keySet());
TabletLogger.missassigned(tls.extent, goal.toString(), state.toString(), tls.future, tls.current, tls.walogs.size());
stats.update(tableId, state);
mergeStats.update(tls.extent, state, tls.chopped, !tls.walogs.isEmpty());
sendChopRequest(mergeStats.getMergeInfo(), state, tls);
sendSplitRequest(mergeStats.getMergeInfo(), state, tls);
// Always follow through with assignments
if (state == TabletState.ASSIGNED) {
goal = TabletGoalState.HOSTED;
}
// if we are shutting down all the tabletservers, we have to do it in order
if ((goal == TabletGoalState.SUSPENDED && state == TabletState.HOSTED) && manager.serversToShutdown.equals(currentTServers.keySet())) {
if (dependentWatcher != null && dependentWatcher.assignedOrHosted() > 0) {
goal = TabletGoalState.HOSTED;
}
}
if (goal == TabletGoalState.HOSTED) {
if ((state != TabletState.HOSTED && !tls.walogs.isEmpty()) && manager.recoveryManager.recoverLogs(tls.extent, tls.walogs))
continue;
switch(state) {
case HOSTED:
if (location.equals(manager.migrations.get(tls.extent)))
manager.migrations.remove(tls.extent);
break;
case ASSIGNED_TO_DEAD_SERVER:
hostDeadTablet(tLists, tls, location, wals);
break;
case SUSPENDED:
hostSuspendedTablet(tLists, tls, location, tableConf);
break;
case UNASSIGNED:
hostUnassignedTablet(tLists, tls.extent, location);
break;
case ASSIGNED:
// Send another reminder
tLists.assigned.add(new Assignment(tls.extent, tls.future));
break;
}
} else {
switch(state) {
case SUSPENDED:
// Request a move to UNASSIGNED, so as to allow balancing to continue.
tLists.suspendedToGoneServers.add(tls);
cancelOfflineTableMigrations(tls.extent);
break;
case UNASSIGNED:
cancelOfflineTableMigrations(tls.extent);
break;
case ASSIGNED_TO_DEAD_SERVER:
unassignDeadTablet(tLists, tls, wals);
break;
case HOSTED:
TServerConnection client = manager.tserverSet.getConnection(location);
if (client != null) {
client.unloadTablet(manager.managerLock, tls.extent, goal.howUnload(), manager.getSteadyTime());
unloaded++;
totalUnloaded++;
} else {
Manager.log.warn("Could not connect to server {}", location);
}
break;
case ASSIGNED:
break;
}
}
counts[state.ordinal()]++;
}
flushChanges(tLists, wals);
// provide stats after flushing changes to avoid race conditions w/ delete table
stats.end(managerState);
// Report changes
for (TabletState state : TabletState.values()) {
int i = state.ordinal();
if (counts[i] > 0 && counts[i] != oldCounts[i]) {
manager.nextEvent.event("[%s]: %d tablets are %s", store.name(), counts[i], state.name());
}
}
Manager.log.debug(String.format("[%s]: scan time %.2f seconds", store.name(), stats.getScanTime() / 1000.));
oldCounts = counts;
if (totalUnloaded > 0) {
manager.nextEvent.event("[%s]: %d tablets unloaded", store.name(), totalUnloaded);
}
updateMergeState(mergeStatsCache);
synchronized (this) {
lastScanServers = ImmutableSortedSet.copyOf(currentTServers.keySet());
}
if (manager.tserverSet.getCurrentServers().equals(currentTServers.keySet())) {
Manager.log.debug(String.format("[%s] sleeping for %.2f seconds", store.name(), Manager.TIME_TO_WAIT_BETWEEN_SCANS / 1000.));
eventListener.waitForEvents(Manager.TIME_TO_WAIT_BETWEEN_SCANS);
} else {
Manager.log.info("Detected change in current tserver set, re-running state machine.");
}
} catch (Exception ex) {
Manager.log.error("Error processing table state for store " + store.name(), ex);
if (ex.getCause() != null && ex.getCause() instanceof BadLocationStateException) {
repairMetadata(((BadLocationStateException) ex.getCause()).getEncodedEndRow());
} else {
sleepUninterruptibly(Manager.WAIT_BETWEEN_ERRORS, TimeUnit.MILLISECONDS);
}
} finally {
if (iter != null) {
try {
iter.close();
} catch (IOException ex) {
Manager.log.warn("Error closing TabletLocationState iterator: " + ex, ex);
}
}
}
}
}
Aggregations