Search in sources :

Example 1 with BadLocationStateException

use of org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException in project accumulo by apache.

the class TabletStateChangeIterator method consume.

@Override
protected void consume() throws IOException {
    while (getSource().hasTop()) {
        Key k = getSource().getTopKey();
        Value v = getSource().getTopValue();
        if (onlineTables == null || current == null || managerState != ManagerState.NORMAL)
            return;
        TabletLocationState tls;
        try {
            tls = MetaDataTableScanner.createTabletLocationState(k, v);
            if (tls == null)
                return;
        } catch (BadLocationStateException e) {
            // maybe the manager can do something with a tablet with bad/inconsistent state
            return;
        }
        // we always want data about merges
        MergeInfo merge = merges.get(tls.extent.tableId());
        if (merge != null) {
            // could make this smarter by only returning if the tablet is involved in the merge
            return;
        }
        // always return the information for migrating tablets
        if (migrations.contains(tls.extent)) {
            return;
        }
        // is the table supposed to be online or offline?
        boolean shouldBeOnline = onlineTables.contains(tls.extent.tableId());
        if (debug) {
            log.debug("{} is {} and should be {} line", tls.extent, tls.getState(current), (shouldBeOnline ? "on" : "off"));
        }
        switch(tls.getState(current)) {
            case ASSIGNED:
                // we always want data about assigned tablets
                return;
            case HOSTED:
                if (!shouldBeOnline)
                    return;
                break;
            case ASSIGNED_TO_DEAD_SERVER:
                return;
            case SUSPENDED:
            case UNASSIGNED:
                if (shouldBeOnline)
                    return;
                break;
            default:
                throw new AssertionError("Inconceivable! The tablet is an unrecognized state: " + tls.getState(current));
        }
        // table is in the expected state so don't bother returning any information about it
        getSource().next();
    }
}
Also used : Value(org.apache.accumulo.core.data.Value) TabletLocationState(org.apache.accumulo.core.metadata.TabletLocationState) Key(org.apache.accumulo.core.data.Key) BadLocationStateException(org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException)

Example 2 with BadLocationStateException

use of org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException in project accumulo by apache.

the class UnloadTabletHandler method run.

@Override
public void run() {
    Tablet t = null;
    synchronized (server.unopenedTablets) {
        if (server.unopenedTablets.contains(extent)) {
            server.unopenedTablets.remove(extent);
            // enqueueManagerMessage(new TabletUnloadedMessage(extent));
            return;
        }
    }
    synchronized (server.openingTablets) {
        while (server.openingTablets.contains(extent)) {
            try {
                log.info("Waiting for tablet {} to finish opening before unloading.", extent);
                server.openingTablets.wait();
            } catch (InterruptedException e) {
            }
        }
    }
    synchronized (server.onlineTablets) {
        if (server.onlineTablets.snapshot().containsKey(extent)) {
            t = server.onlineTablets.snapshot().get(extent);
        }
    }
    if (t == null) {
        // unload request is crossing the successful unloaded message
        if (!server.recentlyUnloadedCache.containsKey(extent)) {
            log.info("told to unload tablet that was not being served {}", extent);
            server.enqueueManagerMessage(new TabletStatusMessage(TabletLoadState.UNLOAD_FAILURE_NOT_SERVING, extent));
        }
        return;
    }
    try {
        t.close(!goalState.equals(TUnloadTabletGoal.DELETED));
    } catch (Exception e) {
        if ((t.isClosing() || t.isClosed()) && e instanceof IllegalStateException) {
            log.debug("Failed to unload tablet {}... it was already closing or closed : {}", extent, e.getMessage());
        } else {
            log.error("Failed to close tablet {}... Aborting migration", extent, e);
            server.enqueueManagerMessage(new TabletStatusMessage(TabletLoadState.UNLOAD_ERROR, extent));
        }
        return;
    }
    // stop serving tablet - client will get not serving tablet
    // exceptions
    server.recentlyUnloadedCache.put(extent, System.currentTimeMillis());
    server.onlineTablets.remove(extent);
    try {
        TServerInstance instance = new TServerInstance(server.clientAddress, server.getLock().getSessionId());
        TabletLocationState tls = null;
        try {
            tls = new TabletLocationState(extent, null, instance, null, null, null, false);
        } catch (BadLocationStateException e) {
            log.error("Unexpected error", e);
        }
        if (!goalState.equals(TUnloadTabletGoal.SUSPENDED) || extent.isRootTablet() || (extent.isMeta() && !server.getConfiguration().getBoolean(Property.MANAGER_METADATA_SUSPENDABLE))) {
            TabletStateStore.unassign(server.getContext(), tls, null);
        } else {
            TabletStateStore.suspend(server.getContext(), tls, null, requestTimeSkew + NANOSECONDS.toMillis(System.nanoTime()));
        }
    } catch (DistributedStoreException ex) {
        log.warn("Unable to update storage", ex);
    } catch (KeeperException e) {
        log.warn("Unable determine our zookeeper session information", e);
    } catch (InterruptedException e) {
        log.warn("Interrupted while getting our zookeeper session information", e);
    }
    // tell the manager how it went
    server.enqueueManagerMessage(new TabletStatusMessage(TabletLoadState.UNLOADED, extent));
    // roll tablet stats over into tablet server's statsKeeper object as
    // historical data
    server.statsKeeper.saveMajorMinorTimes(t.getTabletStats());
}
Also used : TabletStatusMessage(org.apache.accumulo.tserver.managermessage.TabletStatusMessage) TabletLocationState(org.apache.accumulo.core.metadata.TabletLocationState) DistributedStoreException(org.apache.accumulo.server.manager.state.DistributedStoreException) Tablet(org.apache.accumulo.tserver.tablet.Tablet) BadLocationStateException(org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException) KeeperException(org.apache.zookeeper.KeeperException) DistributedStoreException(org.apache.accumulo.server.manager.state.DistributedStoreException) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) BadLocationStateException(org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException) KeeperException(org.apache.zookeeper.KeeperException)

Example 3 with BadLocationStateException

use of org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException in project accumulo by apache.

the class RootTabletStateStoreTest method testRootTabletStateStore.

@Test
public void testRootTabletStateStore() throws DistributedStoreException {
    ZooTabletStateStore tstore = new ZooTabletStateStore(new TestAmple());
    KeyExtent root = RootTable.EXTENT;
    String sessionId = "this is my unique session data";
    TServerInstance server = new TServerInstance(HostAndPort.fromParts("127.0.0.1", 10000), sessionId);
    List<Assignment> assignments = Collections.singletonList(new Assignment(root, server));
    tstore.setFutureLocations(assignments);
    int count = 0;
    for (TabletLocationState location : tstore) {
        assertEquals(location.extent, root);
        assertEquals(location.future, server);
        assertNull(location.current);
        count++;
    }
    assertEquals(count, 1);
    tstore.setLocations(assignments);
    count = 0;
    for (TabletLocationState location : tstore) {
        assertEquals(location.extent, root);
        assertNull(location.future);
        assertEquals(location.current, server);
        count++;
    }
    assertEquals(count, 1);
    TabletLocationState assigned = null;
    try {
        assigned = new TabletLocationState(root, server, null, null, null, null, false);
    } catch (BadLocationStateException e) {
        fail("Unexpected error " + e);
    }
    tstore.unassign(Collections.singletonList(assigned), null);
    count = 0;
    for (TabletLocationState location : tstore) {
        assertEquals(location.extent, root);
        assertNull(location.future);
        assertNull(location.current);
        count++;
    }
    assertEquals(count, 1);
    KeyExtent notRoot = new KeyExtent(TableId.of("0"), null, null);
    final var assignmentList = List.of(new Assignment(notRoot, server));
    assertThrows(IllegalArgumentException.class, () -> tstore.setLocations(assignmentList));
    assertThrows(IllegalArgumentException.class, () -> tstore.setFutureLocations(assignmentList));
    try {
        TabletLocationState broken = new TabletLocationState(notRoot, server, null, null, null, null, false);
        final var assignmentList1 = List.of(broken);
        assertThrows(IllegalArgumentException.class, () -> tstore.unassign(assignmentList1, null));
    } catch (BadLocationStateException e) {
        fail("Unexpected error " + e);
    }
}
Also used : TabletLocationState(org.apache.accumulo.core.metadata.TabletLocationState) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) BadLocationStateException(org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException) Test(org.junit.Test)

Example 4 with BadLocationStateException

use of org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException in project accumulo by apache.

the class MetaDataTableScanner method createTabletLocationState.

public static TabletLocationState createTabletLocationState(Key k, Value v) throws IOException, BadLocationStateException {
    final SortedMap<Key, Value> decodedRow = WholeRowIterator.decodeRow(k, v);
    KeyExtent extent = null;
    TServerInstance future = null;
    TServerInstance current = null;
    TServerInstance last = null;
    SuspendingTServer suspend = null;
    long lastTimestamp = 0;
    List<Collection<String>> walogs = new ArrayList<>();
    boolean chopped = false;
    for (Entry<Key, Value> entry : decodedRow.entrySet()) {
        Key key = entry.getKey();
        Text row = key.getRow();
        Text cf = key.getColumnFamily();
        Text cq = key.getColumnQualifier();
        if (cf.compareTo(FutureLocationColumnFamily.NAME) == 0) {
            TServerInstance location = new TServerInstance(entry.getValue(), cq);
            if (future != null) {
                throw new BadLocationStateException("found two assignments for the same extent " + row + ": " + future + " and " + location, row);
            }
            future = location;
        } else if (cf.compareTo(CurrentLocationColumnFamily.NAME) == 0) {
            TServerInstance location = new TServerInstance(entry.getValue(), cq);
            if (current != null) {
                throw new BadLocationStateException("found two locations for the same extent " + row + ": " + current + " and " + location, row);
            }
            current = location;
        } else if (cf.compareTo(LogColumnFamily.NAME) == 0) {
            String[] split = entry.getValue().toString().split("\\|")[0].split(";");
            walogs.add(Arrays.asList(split));
        } else if (cf.compareTo(LastLocationColumnFamily.NAME) == 0) {
            if (lastTimestamp < entry.getKey().getTimestamp()) {
                last = new TServerInstance(entry.getValue(), cq);
            }
        } else if (cf.compareTo(ChoppedColumnFamily.NAME) == 0) {
            chopped = true;
        } else if (TabletColumnFamily.PREV_ROW_COLUMN.equals(cf, cq)) {
            extent = KeyExtent.fromMetaPrevRow(entry);
        } else if (SuspendLocationColumn.SUSPEND_COLUMN.equals(cf, cq)) {
            suspend = SuspendingTServer.fromValue(entry.getValue());
        }
    }
    if (extent == null) {
        String msg = "No prev-row for key extent " + decodedRow;
        log.error(msg);
        throw new BadLocationStateException(msg, k.getRow());
    }
    return new TabletLocationState(extent, future, current, last, suspend, walogs, chopped);
}
Also used : ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) BadLocationStateException(org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException) SuspendingTServer(org.apache.accumulo.core.metadata.SuspendingTServer) Value(org.apache.accumulo.core.data.Value) Collection(java.util.Collection) TabletLocationState(org.apache.accumulo.core.metadata.TabletLocationState) Key(org.apache.accumulo.core.data.Key)

Example 5 with BadLocationStateException

use of org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException in project accumulo by apache.

the class TabletGroupWatcher method run.

@Override
public void run() {
    int[] oldCounts = new int[TabletState.values().length];
    EventCoordinator.Listener eventListener = this.manager.nextEvent.getListener();
    WalStateManager wals = new WalStateManager(manager.getContext());
    while (manager.stillManager()) {
        // slow things down a little, otherwise we spam the logs when there are many wake-up events
        sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
        int totalUnloaded = 0;
        int unloaded = 0;
        ClosableIterator<TabletLocationState> iter = null;
        try {
            Map<TableId, MergeStats> mergeStatsCache = new HashMap<>();
            Map<TableId, MergeStats> currentMerges = new HashMap<>();
            for (MergeInfo merge : manager.merges()) {
                if (merge.getExtent() != null) {
                    currentMerges.put(merge.getExtent().tableId(), new MergeStats(merge));
                }
            }
            // Get the current status for the current list of tservers
            SortedMap<TServerInstance, TabletServerStatus> currentTServers = new TreeMap<>();
            for (TServerInstance entry : manager.tserverSet.getCurrentServers()) {
                currentTServers.put(entry, manager.tserverStatus.get(entry));
            }
            if (currentTServers.isEmpty()) {
                eventListener.waitForEvents(Manager.TIME_TO_WAIT_BETWEEN_SCANS);
                synchronized (this) {
                    lastScanServers = Collections.emptySortedSet();
                }
                continue;
            }
            TabletLists tLists = new TabletLists(manager, currentTServers);
            ManagerState managerState = manager.getManagerState();
            int[] counts = new int[TabletState.values().length];
            stats.begin();
            // Walk through the tablets in our store, and work tablets
            // towards their goal
            iter = store.iterator();
            while (iter.hasNext()) {
                TabletLocationState tls = iter.next();
                if (tls == null) {
                    continue;
                }
                // ignore entries for tables that do not exist in zookeeper
                if (manager.getTableManager().getTableState(tls.extent.tableId()) == null)
                    continue;
                // Don't overwhelm the tablet servers with work
                if (tLists.unassigned.size() + unloaded > Manager.MAX_TSERVER_WORK_CHUNK * currentTServers.size()) {
                    flushChanges(tLists, wals);
                    tLists.reset();
                    unloaded = 0;
                    eventListener.waitForEvents(Manager.TIME_TO_WAIT_BETWEEN_SCANS);
                }
                TableId tableId = tls.extent.tableId();
                TableConfiguration tableConf = manager.getContext().getTableConfiguration(tableId);
                MergeStats mergeStats = mergeStatsCache.computeIfAbsent(tableId, k -> {
                    var mStats = currentMerges.get(k);
                    return mStats != null ? mStats : new MergeStats(new MergeInfo());
                });
                TabletGoalState goal = manager.getGoalState(tls, mergeStats.getMergeInfo());
                TServerInstance location = tls.getLocation();
                TabletState state = tls.getState(currentTServers.keySet());
                TabletLogger.missassigned(tls.extent, goal.toString(), state.toString(), tls.future, tls.current, tls.walogs.size());
                stats.update(tableId, state);
                mergeStats.update(tls.extent, state, tls.chopped, !tls.walogs.isEmpty());
                sendChopRequest(mergeStats.getMergeInfo(), state, tls);
                sendSplitRequest(mergeStats.getMergeInfo(), state, tls);
                // Always follow through with assignments
                if (state == TabletState.ASSIGNED) {
                    goal = TabletGoalState.HOSTED;
                }
                // if we are shutting down all the tabletservers, we have to do it in order
                if ((goal == TabletGoalState.SUSPENDED && state == TabletState.HOSTED) && manager.serversToShutdown.equals(currentTServers.keySet())) {
                    if (dependentWatcher != null && dependentWatcher.assignedOrHosted() > 0) {
                        goal = TabletGoalState.HOSTED;
                    }
                }
                if (goal == TabletGoalState.HOSTED) {
                    if ((state != TabletState.HOSTED && !tls.walogs.isEmpty()) && manager.recoveryManager.recoverLogs(tls.extent, tls.walogs))
                        continue;
                    switch(state) {
                        case HOSTED:
                            if (location.equals(manager.migrations.get(tls.extent)))
                                manager.migrations.remove(tls.extent);
                            break;
                        case ASSIGNED_TO_DEAD_SERVER:
                            hostDeadTablet(tLists, tls, location, wals);
                            break;
                        case SUSPENDED:
                            hostSuspendedTablet(tLists, tls, location, tableConf);
                            break;
                        case UNASSIGNED:
                            hostUnassignedTablet(tLists, tls.extent, location);
                            break;
                        case ASSIGNED:
                            // Send another reminder
                            tLists.assigned.add(new Assignment(tls.extent, tls.future));
                            break;
                    }
                } else {
                    switch(state) {
                        case SUSPENDED:
                            // Request a move to UNASSIGNED, so as to allow balancing to continue.
                            tLists.suspendedToGoneServers.add(tls);
                            cancelOfflineTableMigrations(tls.extent);
                            break;
                        case UNASSIGNED:
                            cancelOfflineTableMigrations(tls.extent);
                            break;
                        case ASSIGNED_TO_DEAD_SERVER:
                            unassignDeadTablet(tLists, tls, wals);
                            break;
                        case HOSTED:
                            TServerConnection client = manager.tserverSet.getConnection(location);
                            if (client != null) {
                                client.unloadTablet(manager.managerLock, tls.extent, goal.howUnload(), manager.getSteadyTime());
                                unloaded++;
                                totalUnloaded++;
                            } else {
                                Manager.log.warn("Could not connect to server {}", location);
                            }
                            break;
                        case ASSIGNED:
                            break;
                    }
                }
                counts[state.ordinal()]++;
            }
            flushChanges(tLists, wals);
            // provide stats after flushing changes to avoid race conditions w/ delete table
            stats.end(managerState);
            // Report changes
            for (TabletState state : TabletState.values()) {
                int i = state.ordinal();
                if (counts[i] > 0 && counts[i] != oldCounts[i]) {
                    manager.nextEvent.event("[%s]: %d tablets are %s", store.name(), counts[i], state.name());
                }
            }
            Manager.log.debug(String.format("[%s]: scan time %.2f seconds", store.name(), stats.getScanTime() / 1000.));
            oldCounts = counts;
            if (totalUnloaded > 0) {
                manager.nextEvent.event("[%s]: %d tablets unloaded", store.name(), totalUnloaded);
            }
            updateMergeState(mergeStatsCache);
            synchronized (this) {
                lastScanServers = ImmutableSortedSet.copyOf(currentTServers.keySet());
            }
            if (manager.tserverSet.getCurrentServers().equals(currentTServers.keySet())) {
                Manager.log.debug(String.format("[%s] sleeping for %.2f seconds", store.name(), Manager.TIME_TO_WAIT_BETWEEN_SCANS / 1000.));
                eventListener.waitForEvents(Manager.TIME_TO_WAIT_BETWEEN_SCANS);
            } else {
                Manager.log.info("Detected change in current tserver set, re-running state machine.");
            }
        } catch (Exception ex) {
            Manager.log.error("Error processing table state for store " + store.name(), ex);
            if (ex.getCause() != null && ex.getCause() instanceof BadLocationStateException) {
                repairMetadata(((BadLocationStateException) ex.getCause()).getEncodedEndRow());
            } else {
                sleepUninterruptibly(Manager.WAIT_BETWEEN_ERRORS, TimeUnit.MILLISECONDS);
            }
        } finally {
            if (iter != null) {
                try {
                    iter.close();
                } catch (IOException ex) {
                    Manager.log.warn("Error closing TabletLocationState iterator: " + ex, ex);
                }
            }
        }
    }
}
Also used : TableId(org.apache.accumulo.core.data.TableId) MergeInfo(org.apache.accumulo.server.manager.state.MergeInfo) HashMap(java.util.HashMap) TabletGoalState(org.apache.accumulo.manager.Manager.TabletGoalState) BadLocationStateException(org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException) Assignment(org.apache.accumulo.server.manager.state.Assignment) ManagerState(org.apache.accumulo.core.manager.thrift.ManagerState) TabletLocationState(org.apache.accumulo.core.metadata.TabletLocationState) TabletServerStatus(org.apache.accumulo.core.master.thrift.TabletServerStatus) TableConfiguration(org.apache.accumulo.server.conf.TableConfiguration) IOException(java.io.IOException) TreeMap(java.util.TreeMap) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) DistributedStoreException(org.apache.accumulo.server.manager.state.DistributedStoreException) MutationsRejectedException(org.apache.accumulo.core.client.MutationsRejectedException) NotServingTabletException(org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException) WalMarkerException(org.apache.accumulo.server.log.WalStateManager.WalMarkerException) BadLocationStateException(org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException) TException(org.apache.thrift.TException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) TServerConnection(org.apache.accumulo.server.manager.LiveTServerSet.TServerConnection) TabletState(org.apache.accumulo.core.metadata.TabletState) WalStateManager(org.apache.accumulo.server.log.WalStateManager) MergeStats(org.apache.accumulo.manager.state.MergeStats)

Aggregations

TabletLocationState (org.apache.accumulo.core.metadata.TabletLocationState)6 BadLocationStateException (org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException)6 TServerInstance (org.apache.accumulo.core.metadata.TServerInstance)4 Key (org.apache.accumulo.core.data.Key)3 Value (org.apache.accumulo.core.data.Value)3 KeyExtent (org.apache.accumulo.core.dataImpl.KeyExtent)3 TableId (org.apache.accumulo.core.data.TableId)2 DistributedStoreException (org.apache.accumulo.server.manager.state.DistributedStoreException)2 Text (org.apache.hadoop.io.Text)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 HashMap (java.util.HashMap)1 TreeMap (java.util.TreeMap)1 AccumuloException (org.apache.accumulo.core.client.AccumuloException)1 MutationsRejectedException (org.apache.accumulo.core.client.MutationsRejectedException)1 Scanner (org.apache.accumulo.core.client.Scanner)1 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)1 Range (org.apache.accumulo.core.data.Range)1 ManagerState (org.apache.accumulo.core.manager.thrift.ManagerState)1