Search in sources :

Example 11 with TServerInstance

use of org.apache.accumulo.core.metadata.TServerInstance in project accumulo by apache.

the class TabletGroupWatcher method cancelOfflineTableMigrations.

private void cancelOfflineTableMigrations(KeyExtent extent) {
    TServerInstance dest = manager.migrations.get(extent);
    TableState tableState = manager.getTableManager().getTableState(extent.tableId());
    if (dest != null && tableState == TableState.OFFLINE) {
        manager.migrations.remove(extent);
    }
}
Also used : TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) TableState(org.apache.accumulo.core.manager.state.tables.TableState)

Example 12 with TServerInstance

use of org.apache.accumulo.core.metadata.TServerInstance in project accumulo by apache.

the class TabletGroupWatcher method hostSuspendedTablet.

private void hostSuspendedTablet(TabletLists tLists, TabletLocationState tls, TServerInstance location, TableConfiguration tableConf) {
    if (manager.getSteadyTime() - tls.suspend.suspensionTime < tableConf.getTimeInMillis(Property.TABLE_SUSPEND_DURATION)) {
        // Tablet is suspended. See if its tablet server is back.
        TServerInstance returnInstance = null;
        Iterator<TServerInstance> find = tLists.destinations.tailMap(new TServerInstance(tls.suspend.server, " ")).keySet().iterator();
        if (find.hasNext()) {
            TServerInstance found = find.next();
            if (found.getHostAndPort().equals(tls.suspend.server)) {
                returnInstance = found;
            }
        }
        // Old tablet server is back. Return this tablet to its previous owner.
        if (returnInstance != null) {
            tLists.assignments.add(new Assignment(tls.extent, returnInstance));
        }
    // else - tablet server not back. Don't ask for a new assignment right now.
    } else {
        // Treat as unassigned, ask for a new assignment.
        tLists.unassigned.put(tls.extent, location);
    }
}
Also used : Assignment(org.apache.accumulo.server.manager.state.Assignment) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance)

Example 13 with TServerInstance

use of org.apache.accumulo.core.metadata.TServerInstance in project accumulo by apache.

the class TabletGroupWatcher method repairMetadata.

private void repairMetadata(Text row) {
    Manager.log.debug("Attempting repair on {}", row);
    // Attempt to find the dead server entry and remove it.
    try {
        Map<Key, Value> future = new HashMap<>();
        Map<Key, Value> assigned = new HashMap<>();
        KeyExtent extent = KeyExtent.fromMetaRow(row);
        String table = MetadataTable.NAME;
        if (extent.isMeta())
            table = RootTable.NAME;
        Scanner scanner = manager.getContext().createScanner(table, Authorizations.EMPTY);
        scanner.fetchColumnFamily(CurrentLocationColumnFamily.NAME);
        scanner.fetchColumnFamily(FutureLocationColumnFamily.NAME);
        scanner.setRange(new Range(row));
        for (Entry<Key, Value> entry : scanner) {
            if (entry.getKey().getColumnFamily().equals(CurrentLocationColumnFamily.NAME)) {
                assigned.put(entry.getKey(), entry.getValue());
            } else if (entry.getKey().getColumnFamily().equals(FutureLocationColumnFamily.NAME)) {
                future.put(entry.getKey(), entry.getValue());
            }
        }
        if (!future.isEmpty() && !assigned.isEmpty()) {
            Manager.log.warn("Found a tablet assigned and hosted, attempting to repair");
        } else if (future.size() > 1 && assigned.isEmpty()) {
            Manager.log.warn("Found a tablet assigned to multiple servers, attempting to repair");
        } else if (future.isEmpty() && assigned.size() > 1) {
            Manager.log.warn("Found a tablet hosted on multiple servers, attempting to repair");
        } else {
            Manager.log.info("Attempted a repair, but nothing seems to be obviously wrong. {} {}", assigned, future);
            return;
        }
        Iterator<Entry<Key, Value>> iter = Iterators.concat(future.entrySet().iterator(), assigned.entrySet().iterator());
        while (iter.hasNext()) {
            Entry<Key, Value> entry = iter.next();
            TServerInstance alive = manager.tserverSet.find(entry.getValue().toString());
            if (alive == null) {
                Manager.log.info("Removing entry  {}", entry);
                BatchWriter bw = manager.getContext().createBatchWriter(table);
                Mutation m = new Mutation(entry.getKey().getRow());
                m.putDelete(entry.getKey().getColumnFamily(), entry.getKey().getColumnQualifier());
                bw.addMutation(m);
                bw.close();
                return;
            }
        }
        Manager.log.error("Metadata table is inconsistent at {} and all assigned/future tservers are still online.", row);
    } catch (Exception e) {
        Manager.log.error("Error attempting repair of metadata " + row + ": " + e, e);
    }
}
Also used : Scanner(org.apache.accumulo.core.client.Scanner) HashMap(java.util.HashMap) Range(org.apache.accumulo.core.data.Range) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) DistributedStoreException(org.apache.accumulo.server.manager.state.DistributedStoreException) MutationsRejectedException(org.apache.accumulo.core.client.MutationsRejectedException) NotServingTabletException(org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException) WalMarkerException(org.apache.accumulo.server.log.WalStateManager.WalMarkerException) BadLocationStateException(org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateException) TException(org.apache.thrift.TException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) Entry(java.util.Map.Entry) Value(org.apache.accumulo.core.data.Value) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Mutation(org.apache.accumulo.core.data.Mutation) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 14 with TServerInstance

use of org.apache.accumulo.core.metadata.TServerInstance in project accumulo by apache.

the class GarbageCollectWriteAheadLogs method removeEntriesInUse.

private Map<UUID, TServerInstance> removeEntriesInUse(Map<TServerInstance, Set<UUID>> candidates, Set<TServerInstance> liveServers, Map<UUID, Pair<WalState, Path>> logsState, Map<UUID, Path> recoveryLogs) {
    Map<UUID, TServerInstance> result = new HashMap<>();
    for (Entry<TServerInstance, Set<UUID>> entry : candidates.entrySet()) {
        for (UUID id : entry.getValue()) {
            if (result.put(id, entry.getKey()) != null) {
                throw new IllegalArgumentException("WAL " + id + " owned by multiple tservers");
            }
        }
    }
    // remove any entries if there's a log reference (recovery hasn't finished)
    for (TabletLocationState state : store) {
        // Easiest to just ignore all the WALs for the dead server.
        if (state.getState(liveServers) == TabletState.ASSIGNED_TO_DEAD_SERVER) {
            Set<UUID> idsToIgnore = candidates.remove(state.current);
            if (idsToIgnore != null) {
                result.keySet().removeAll(idsToIgnore);
                recoveryLogs.keySet().removeAll(idsToIgnore);
            }
        }
        // that made the WALs.
        for (Collection<String> wals : state.walogs) {
            for (String wal : wals) {
                UUID walUUID = path2uuid(new Path(wal));
                TServerInstance dead = result.get(walUUID);
                // There's a reference to a log file, so skip that server's logs
                Set<UUID> idsToIgnore = candidates.remove(dead);
                if (idsToIgnore != null) {
                    result.keySet().removeAll(idsToIgnore);
                    recoveryLogs.keySet().removeAll(idsToIgnore);
                }
            }
        }
    }
    // Remove OPEN and CLOSED logs for live servers: they are still in use
    for (TServerInstance liveServer : liveServers) {
        Set<UUID> idsForServer = candidates.get(liveServer);
        // Server may not have any logs yet
        if (idsForServer != null) {
            for (UUID id : idsForServer) {
                Pair<WalState, Path> stateFile = logsState.get(id);
                if (stateFile.getFirst() != WalState.UNREFERENCED) {
                    result.remove(id);
                }
            }
            recoveryLogs.keySet().removeAll(idsForServer);
        }
    }
    return result;
}
Also used : Path(org.apache.hadoop.fs.Path) HashSet(java.util.HashSet) LiveTServerSet(org.apache.accumulo.server.manager.LiveTServerSet) Set(java.util.Set) HashMap(java.util.HashMap) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) TabletLocationState(org.apache.accumulo.core.metadata.TabletLocationState) WalState(org.apache.accumulo.server.log.WalStateManager.WalState) UUID(java.util.UUID)

Example 15 with TServerInstance

use of org.apache.accumulo.core.metadata.TServerInstance in project accumulo by apache.

the class GarbageCollectWriteAheadLogs method getCurrent.

/**
 * Scans log markers. The map passed in is populated with the log ids.
 *
 * @param logsByServer
 *          map of dead server to log file entries
 * @return total number of log files
 */
private long getCurrent(Map<TServerInstance, Set<UUID>> logsByServer, Map<UUID, Pair<WalState, Path>> logState) throws Exception {
    // get all the unused WALs in zookeeper
    long result = 0;
    Map<TServerInstance, List<UUID>> markers = walMarker.getAllMarkers();
    for (Entry<TServerInstance, List<UUID>> entry : markers.entrySet()) {
        HashSet<UUID> ids = new HashSet<>(entry.getValue().size());
        for (UUID id : entry.getValue()) {
            ids.add(id);
            logState.put(id, walMarker.state(entry.getKey(), id));
            result++;
        }
        logsByServer.put(entry.getKey(), ids);
    }
    return result;
}
Also used : List(java.util.List) UUID(java.util.UUID) TServerInstance(org.apache.accumulo.core.metadata.TServerInstance) HashSet(java.util.HashSet)

Aggregations

TServerInstance (org.apache.accumulo.core.metadata.TServerInstance)89 KeyExtent (org.apache.accumulo.core.dataImpl.KeyExtent)32 ArrayList (java.util.ArrayList)31 Test (org.junit.Test)30 HashMap (java.util.HashMap)21 ServerContext (org.apache.accumulo.server.ServerContext)18 TabletLocationState (org.apache.accumulo.core.metadata.TabletLocationState)17 HostAndPort (org.apache.accumulo.core.util.HostAndPort)14 HashSet (java.util.HashSet)13 TabletServerStatus (org.apache.accumulo.core.master.thrift.TabletServerStatus)13 TableId (org.apache.accumulo.core.data.TableId)12 AccumuloConfiguration (org.apache.accumulo.core.conf.AccumuloConfiguration)11 List (java.util.List)10 LiveTServerSet (org.apache.accumulo.server.manager.LiveTServerSet)10 TException (org.apache.thrift.TException)10 TreeMap (java.util.TreeMap)9 UUID (java.util.UUID)9 TreeSet (java.util.TreeSet)8 Key (org.apache.accumulo.core.data.Key)8 Value (org.apache.accumulo.core.data.Value)8