Search in sources :

Example 36 with TServerInstance

use of org.apache.accumulo.server.master.state.TServerInstance in project accumulo by apache.

the class FindOfflineTablets method findOffline.

static int findOffline(ClientContext context, String tableName) throws AccumuloException, TableNotFoundException {
    final AtomicBoolean scanning = new AtomicBoolean(false);
    LiveTServerSet tservers = new LiveTServerSet(context, new Listener() {

        @Override
        public void update(LiveTServerSet current, Set<TServerInstance> deleted, Set<TServerInstance> added) {
            if (!deleted.isEmpty() && scanning.get())
                log.warn("Tablet servers deleted while scanning: {}", deleted);
            if (!added.isEmpty() && scanning.get())
                log.warn("Tablet servers added while scanning: {}", added);
        }
    });
    tservers.startListeningForTabletServerChanges();
    scanning.set(true);
    Iterator<TabletLocationState> zooScanner;
    try {
        zooScanner = new ZooTabletStateStore().iterator();
    } catch (DistributedStoreException e) {
        throw new AccumuloException(e);
    }
    int offline = 0;
    System.out.println("Scanning zookeeper");
    if ((offline = checkTablets(zooScanner, tservers)) > 0)
        return offline;
    if (RootTable.NAME.equals(tableName))
        return 0;
    System.out.println("Scanning " + RootTable.NAME);
    Iterator<TabletLocationState> rootScanner = new MetaDataTableScanner(context, MetadataSchema.TabletsSection.getRange(), RootTable.NAME);
    if ((offline = checkTablets(rootScanner, tservers)) > 0)
        return offline;
    if (MetadataTable.NAME.equals(tableName))
        return 0;
    System.out.println("Scanning " + MetadataTable.NAME);
    Range range = MetadataSchema.TabletsSection.getRange();
    if (tableName != null) {
        Table.ID tableId = Tables.getTableId(context.getInstance(), tableName);
        range = new KeyExtent(tableId, null, null).toMetadataRange();
    }
    try (MetaDataTableScanner metaScanner = new MetaDataTableScanner(context, range, MetadataTable.NAME)) {
        return checkTablets(metaScanner, tservers);
    }
}
Also used : AccumuloException(org.apache.accumulo.core.client.AccumuloException) Listener(org.apache.accumulo.server.master.LiveTServerSet.Listener) MetadataTable(org.apache.accumulo.core.metadata.MetadataTable) RootTable(org.apache.accumulo.core.metadata.RootTable) Table(org.apache.accumulo.core.client.impl.Table) DistributedStoreException(org.apache.accumulo.server.master.state.DistributedStoreException) Range(org.apache.accumulo.core.data.Range) KeyExtent(org.apache.accumulo.core.data.impl.KeyExtent) TServerInstance(org.apache.accumulo.server.master.state.TServerInstance) LiveTServerSet(org.apache.accumulo.server.master.LiveTServerSet) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) MetaDataTableScanner(org.apache.accumulo.server.master.state.MetaDataTableScanner) TabletLocationState(org.apache.accumulo.server.master.state.TabletLocationState) ZooTabletStateStore(org.apache.accumulo.server.master.state.ZooTabletStateStore)

Example 37 with TServerInstance

use of org.apache.accumulo.server.master.state.TServerInstance in project accumulo by apache.

the class MasterRepairsDualAssignmentIT method test.

@Test
public void test() throws Exception {
    // make some tablets, spread 'em around
    Connector c = getConnector();
    ClientContext context = new ClientContext(c.getInstance(), new Credentials("root", new PasswordToken(ROOT_PASSWORD)), getClientConfig());
    String table = this.getUniqueNames(1)[0];
    c.securityOperations().grantTablePermission("root", MetadataTable.NAME, TablePermission.WRITE);
    c.securityOperations().grantTablePermission("root", RootTable.NAME, TablePermission.WRITE);
    c.tableOperations().create(table);
    SortedSet<Text> partitions = new TreeSet<>();
    for (String part : "a b c d e f g h i j k l m n o p q r s t u v w x y z".split(" ")) {
        partitions.add(new Text(part));
    }
    c.tableOperations().addSplits(table, partitions);
    // scan the metadata table and get the two table location states
    Set<TServerInstance> states = new HashSet<>();
    Set<TabletLocationState> oldLocations = new HashSet<>();
    MetaDataStateStore store = new MetaDataStateStore(context, null);
    while (states.size() < 2) {
        UtilWaitThread.sleep(250);
        oldLocations.clear();
        for (TabletLocationState tls : store) {
            if (tls.current != null) {
                states.add(tls.current);
                oldLocations.add(tls);
            }
        }
    }
    assertEquals(2, states.size());
    // Kill a tablet server... we don't care which one... wait for everything to be reassigned
    cluster.killProcess(ServerType.TABLET_SERVER, cluster.getProcesses().get(ServerType.TABLET_SERVER).iterator().next());
    Set<TServerInstance> replStates = new HashSet<>();
    // Find out which tablet server remains
    while (true) {
        UtilWaitThread.sleep(1000);
        states.clear();
        replStates.clear();
        boolean allAssigned = true;
        for (TabletLocationState tls : store) {
            if (tls != null && tls.current != null) {
                states.add(tls.current);
            } else if (tls != null && tls.extent.equals(new KeyExtent(ReplicationTable.ID, null, null))) {
                replStates.add(tls.current);
            } else {
                allAssigned = false;
            }
        }
        System.out.println(states + " size " + states.size() + " allAssigned " + allAssigned);
        if (states.size() != 2 && allAssigned)
            break;
    }
    assertEquals(1, replStates.size());
    assertEquals(1, states.size());
    // pick an assigned tablet and assign it to the old tablet
    TabletLocationState moved = null;
    for (TabletLocationState old : oldLocations) {
        if (!states.contains(old.current)) {
            moved = old;
        }
    }
    assertNotEquals(null, moved);
    // throw a mutation in as if we were the dying tablet
    BatchWriter bw = c.createBatchWriter(MetadataTable.NAME, new BatchWriterConfig());
    Mutation assignment = new Mutation(moved.extent.getMetadataEntry());
    moved.current.putLocation(assignment);
    bw.addMutation(assignment);
    bw.close();
    // wait for the master to fix the problem
    waitForCleanStore(store);
    // now jam up the metadata table
    bw = c.createBatchWriter(MetadataTable.NAME, new BatchWriterConfig());
    assignment = new Mutation(new KeyExtent(MetadataTable.ID, null, null).getMetadataEntry());
    moved.current.putLocation(assignment);
    bw.addMutation(assignment);
    bw.close();
    waitForCleanStore(new RootTabletStateStore(context, null));
}
Also used : Connector(org.apache.accumulo.core.client.Connector) ClientContext(org.apache.accumulo.core.client.impl.ClientContext) Text(org.apache.hadoop.io.Text) KeyExtent(org.apache.accumulo.core.data.impl.KeyExtent) TServerInstance(org.apache.accumulo.server.master.state.TServerInstance) MetaDataStateStore(org.apache.accumulo.server.master.state.MetaDataStateStore) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) TreeSet(java.util.TreeSet) RootTabletStateStore(org.apache.accumulo.server.master.state.RootTabletStateStore) TabletLocationState(org.apache.accumulo.server.master.state.TabletLocationState) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Mutation(org.apache.accumulo.core.data.Mutation) Credentials(org.apache.accumulo.core.client.impl.Credentials) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 38 with TServerInstance

use of org.apache.accumulo.server.master.state.TServerInstance in project accumulo by apache.

the class HostRegexTableLoadBalancer method balance.

@Override
public long balance(SortedMap<TServerInstance, TabletServerStatus> current, Set<KeyExtent> migrations, List<TabletMigration> migrationsOut) {
    long minBalanceTime = 20 * 1000;
    // Iterate over the tables and balance each of them
    TableOperations t = getTableOperations();
    if (t == null)
        return minBalanceTime;
    Map<String, String> tableIdMap = t.tableIdMap();
    long now = System.currentTimeMillis();
    Map<String, SortedMap<TServerInstance, TabletServerStatus>> currentGrouped = splitCurrentByRegex(current);
    if ((now - this.lastOOBCheck) > this.oobCheckMillis) {
        try {
            // Check to see if a tablet is assigned outside the bounds of the pool. If so, migrate it.
            for (String table : t.list()) {
                LOG.debug("Checking for out of bounds tablets for table {}", table);
                String tablePoolName = getPoolNameForTable(table);
                for (Entry<TServerInstance, TabletServerStatus> e : current.entrySet()) {
                    // pool names are the same as table names, except in the DEFAULT case.
                    // If this table is assigned to a pool for this host, then move on.
                    List<String> hostPools = getPoolNamesForHost(e.getKey().host());
                    if (hostPools.contains(tablePoolName)) {
                        continue;
                    }
                    String tid = tableIdMap.get(table);
                    if (null == tid) {
                        LOG.warn("Unable to check for out of bounds tablets for table {}, it may have been deleted or renamed.", table);
                        continue;
                    }
                    try {
                        List<TabletStats> outOfBoundsTablets = getOnlineTabletsForTable(e.getKey(), Table.ID.of(tid));
                        if (null == outOfBoundsTablets) {
                            continue;
                        }
                        Random random = new Random();
                        for (TabletStats ts : outOfBoundsTablets) {
                            KeyExtent ke = new KeyExtent(ts.getExtent());
                            if (migrations.contains(ke)) {
                                LOG.debug("Migration for out of bounds tablet {} has already been requested", ke);
                                continue;
                            }
                            String poolName = getPoolNameForTable(table);
                            SortedMap<TServerInstance, TabletServerStatus> currentView = currentGrouped.get(poolName);
                            if (null != currentView) {
                                int skip = random.nextInt(currentView.size());
                                Iterator<TServerInstance> iter = currentView.keySet().iterator();
                                for (int i = 0; i < skip; i++) {
                                    iter.next();
                                }
                                TServerInstance nextTS = iter.next();
                                LOG.info("Tablet {} is currently outside the bounds of the regex, migrating from {} to {}", ke, e.getKey(), nextTS);
                                migrationsOut.add(new TabletMigration(ke, e.getKey(), nextTS));
                                if (migrationsOut.size() >= this.maxTServerMigrations) {
                                    break;
                                }
                            } else {
                                LOG.warn("No tablet servers online for pool {}, unable to migrate out of bounds tablets", poolName);
                            }
                        }
                    } catch (TException e1) {
                        LOG.error("Error in OOB check getting tablets for table {} from server {}", tid, e.getKey().host(), e);
                    }
                }
            }
        } finally {
            // this could have taken a while...get a new time
            this.lastOOBCheck = System.currentTimeMillis();
        }
    }
    if (migrationsOut.size() > 0) {
        LOG.warn("Not balancing tables due to moving {} out of bounds tablets", migrationsOut.size());
        LOG.info("Migrating out of bounds tablets: {}", migrationsOut);
        return minBalanceTime;
    }
    if (migrations != null && migrations.size() > 0) {
        if (migrations.size() >= maxOutstandingMigrations) {
            LOG.warn("Not balancing tables due to {} outstanding migrations", migrations.size());
            if (LOG.isTraceEnabled()) {
                LOG.trace("Sample up to 10 outstanding migrations: {}", Iterables.limit(migrations, 10));
            }
            return minBalanceTime;
        }
        LOG.debug("Current outstanding migrations of {} being applied", migrations.size());
        if (LOG.isTraceEnabled()) {
            LOG.trace("Sample up to 10 outstanding migrations: {}", Iterables.limit(migrations, 10));
        }
        migrationsFromLastPass.keySet().retainAll(migrations);
        SortedMap<TServerInstance, TabletServerStatus> currentCopy = new TreeMap<>(current);
        Multimap<TServerInstance, String> serverTableIdCopied = HashMultimap.create();
        for (TabletMigration migration : migrationsFromLastPass.values()) {
            TableInfo fromInfo = getTableInfo(currentCopy, serverTableIdCopied, migration.tablet.getTableId().toString(), migration.oldServer);
            if (fromInfo != null) {
                fromInfo.setOnlineTablets(fromInfo.getOnlineTablets() - 1);
            }
            TableInfo toInfo = getTableInfo(currentCopy, serverTableIdCopied, migration.tablet.getTableId().toString(), migration.newServer);
            if (toInfo != null) {
                toInfo.setOnlineTablets(toInfo.getOnlineTablets() + 1);
            }
        }
        migrations = EMPTY_MIGRATIONS;
    } else {
        migrationsFromLastPass.clear();
    }
    for (String s : tableIdMap.values()) {
        Table.ID tableId = Table.ID.of(s);
        String tableName = tableIdToTableName.get(tableId);
        String regexTableName = getPoolNameForTable(tableName);
        SortedMap<TServerInstance, TabletServerStatus> currentView = currentGrouped.get(regexTableName);
        if (null == currentView) {
            LOG.warn("Skipping balance for table {} as no tablet servers are online.", tableName);
            continue;
        }
        ArrayList<TabletMigration> newMigrations = new ArrayList<>();
        getBalancerForTable(tableId).balance(currentView, migrations, newMigrations);
        if (newMigrations.isEmpty()) {
            tableToTimeSinceNoMigrations.remove(s);
        } else if (tableToTimeSinceNoMigrations.containsKey(s)) {
            if ((now - tableToTimeSinceNoMigrations.get(s)) > ONE_HOUR) {
                LOG.warn("We have been consistently producing migrations for {}: {}", tableName, Iterables.limit(newMigrations, 10));
            }
        } else {
            tableToTimeSinceNoMigrations.put(s, now);
        }
        migrationsOut.addAll(newMigrations);
        if (migrationsOut.size() >= this.maxTServerMigrations) {
            break;
        }
    }
    for (TabletMigration migration : migrationsOut) {
        migrationsFromLastPass.put(migration.tablet, migration);
    }
    LOG.info("Migrating tablets for balance: {}", migrationsOut);
    return minBalanceTime;
}
Also used : TException(org.apache.thrift.TException) ArrayList(java.util.ArrayList) KeyExtent(org.apache.accumulo.core.data.impl.KeyExtent) TableOperations(org.apache.accumulo.core.client.admin.TableOperations) Random(java.util.Random) TableInfo(org.apache.accumulo.core.master.thrift.TableInfo) TabletServerStatus(org.apache.accumulo.core.master.thrift.TabletServerStatus) Table(org.apache.accumulo.core.client.impl.Table) TabletMigration(org.apache.accumulo.server.master.state.TabletMigration) TabletStats(org.apache.accumulo.core.tabletserver.thrift.TabletStats) TreeMap(java.util.TreeMap) TServerInstance(org.apache.accumulo.server.master.state.TServerInstance) SortedMap(java.util.SortedMap)

Example 39 with TServerInstance

use of org.apache.accumulo.server.master.state.TServerInstance in project accumulo by apache.

the class Master method gatherTableInformation.

private SortedMap<TServerInstance, TabletServerStatus> gatherTableInformation(Set<TServerInstance> currentServers) {
    long start = System.currentTimeMillis();
    int threads = Math.max(getConfiguration().getCount(Property.MASTER_STATUS_THREAD_POOL_SIZE), 1);
    ExecutorService tp = Executors.newFixedThreadPool(threads);
    final SortedMap<TServerInstance, TabletServerStatus> result = new TreeMap<>();
    for (TServerInstance serverInstance : currentServers) {
        final TServerInstance server = serverInstance;
        tp.submit(new Runnable() {

            @Override
            public void run() {
                try {
                    Thread t = Thread.currentThread();
                    String oldName = t.getName();
                    try {
                        t.setName("Getting status from " + server);
                        TServerConnection connection = tserverSet.getConnection(server);
                        if (connection == null)
                            throw new IOException("No connection to " + server);
                        TabletServerStatus status = connection.getTableMap(false);
                        result.put(server, status);
                    } finally {
                        t.setName(oldName);
                    }
                } catch (Exception ex) {
                    log.error("unable to get tablet server status {} {}", server, ex.toString());
                    log.debug("unable to get tablet server status {}", server, ex);
                    if (badServers.get(server).incrementAndGet() > MAX_BAD_STATUS_COUNT) {
                        log.warn("attempting to stop {}", server);
                        try {
                            TServerConnection connection = tserverSet.getConnection(server);
                            if (connection != null) {
                                connection.halt(masterLock);
                            }
                        } catch (TTransportException e) {
                        // ignore: it's probably down
                        } catch (Exception e) {
                            log.info("error talking to troublesome tablet server", e);
                        }
                        badServers.remove(server);
                    }
                }
            }
        });
    }
    tp.shutdown();
    try {
        tp.awaitTermination(getConfiguration().getTimeInMillis(Property.TSERV_CLIENT_TIMEOUT) * 2, TimeUnit.MILLISECONDS);
    } catch (InterruptedException e) {
        log.debug("Interrupted while fetching status");
    }
    synchronized (badServers) {
        badServers.keySet().retainAll(currentServers);
        badServers.keySet().removeAll(result.keySet());
    }
    log.debug(String.format("Finished gathering information from %d servers in %.2f seconds", result.size(), (System.currentTimeMillis() - start) / 1000.));
    return result;
}
Also used : TTransportException(org.apache.thrift.transport.TTransportException) IOException(java.io.IOException) TreeMap(java.util.TreeMap) TServerInstance(org.apache.accumulo.server.master.state.TServerInstance) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) NoAuthException(org.apache.zookeeper.KeeperException.NoAuthException) WalMarkerException(org.apache.accumulo.server.log.WalStateManager.WalMarkerException) TException(org.apache.thrift.TException) IOException(java.io.IOException) ThriftTableOperationException(org.apache.accumulo.core.client.impl.thrift.ThriftTableOperationException) TTransportException(org.apache.thrift.transport.TTransportException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) KeeperException(org.apache.zookeeper.KeeperException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) TServerConnection(org.apache.accumulo.server.master.LiveTServerSet.TServerConnection) ExecutorService(java.util.concurrent.ExecutorService) TabletServerStatus(org.apache.accumulo.core.master.thrift.TabletServerStatus)

Example 40 with TServerInstance

use of org.apache.accumulo.server.master.state.TServerInstance in project accumulo by apache.

the class Master method getGoalState.

TabletGoalState getGoalState(TabletLocationState tls, MergeInfo mergeInfo) {
    KeyExtent extent = tls.extent;
    // Shutting down?
    TabletGoalState state = getSystemGoalState(tls);
    if (state == TabletGoalState.HOSTED) {
        if (tls.current != null && serversToShutdown.contains(tls.current)) {
            return TabletGoalState.SUSPENDED;
        }
        // Handle merge transitions
        if (mergeInfo.getExtent() != null) {
            log.debug("mergeInfo overlaps: {} {}", extent, mergeInfo.overlaps(extent));
            if (mergeInfo.overlaps(extent)) {
                switch(mergeInfo.getState()) {
                    case NONE:
                    case COMPLETE:
                        break;
                    case STARTED:
                    case SPLITTING:
                        return TabletGoalState.HOSTED;
                    case WAITING_FOR_CHOPPED:
                        if (tls.getState(tserverSet.getCurrentServers()).equals(TabletState.HOSTED)) {
                            if (tls.chopped)
                                return TabletGoalState.UNASSIGNED;
                        } else {
                            if (tls.chopped && tls.walogs.isEmpty())
                                return TabletGoalState.UNASSIGNED;
                        }
                        return TabletGoalState.HOSTED;
                    case WAITING_FOR_OFFLINE:
                    case MERGING:
                        return TabletGoalState.UNASSIGNED;
                }
            }
        }
        // taking table offline?
        state = getTableGoalState(extent);
        if (state == TabletGoalState.HOSTED) {
            // Maybe this tablet needs to be migrated
            TServerInstance dest = migrations.get(extent);
            if (dest != null && tls.current != null && !dest.equals(tls.current)) {
                return TabletGoalState.UNASSIGNED;
            }
        }
    }
    return state;
}
Also used : KeyExtent(org.apache.accumulo.core.data.impl.KeyExtent) TServerInstance(org.apache.accumulo.server.master.state.TServerInstance)

Aggregations

TServerInstance (org.apache.accumulo.server.master.state.TServerInstance)67 KeyExtent (org.apache.accumulo.core.data.impl.KeyExtent)35 HashMap (java.util.HashMap)22 Test (org.junit.Test)22 ArrayList (java.util.ArrayList)21 TabletServerStatus (org.apache.accumulo.core.master.thrift.TabletServerStatus)14 HashSet (java.util.HashSet)10 Value (org.apache.accumulo.core.data.Value)10 AccumuloServerContext (org.apache.accumulo.server.AccumuloServerContext)10 TServerConnection (org.apache.accumulo.server.master.LiveTServerSet.TServerConnection)9 TabletMigration (org.apache.accumulo.server.master.state.TabletMigration)9 TreeMap (java.util.TreeMap)8 Instance (org.apache.accumulo.core.client.Instance)8 Table (org.apache.accumulo.core.client.impl.Table)8 TKeyExtent (org.apache.accumulo.core.data.thrift.TKeyExtent)8 UUID (java.util.UUID)7 Key (org.apache.accumulo.core.data.Key)7 Text (org.apache.hadoop.io.Text)7 TException (org.apache.thrift.TException)7 List (java.util.List)6