Search in sources :

Example 1 with MasterMonitorInfo

use of org.apache.accumulo.core.master.thrift.MasterMonitorInfo in project accumulo by apache.

the class BalanceAfterCommsFailureIT method checkBalance.

private void checkBalance(Connector c) throws Exception {
    Credentials creds = new Credentials("root", new PasswordToken(ROOT_PASSWORD));
    ClientContext context = new ClientContext(c.getInstance(), creds, getClientConfig());
    MasterMonitorInfo stats = null;
    int unassignedTablets = 1;
    for (int i = 0; unassignedTablets > 0 && i < 10; i++) {
        MasterClientService.Iface client = null;
        while (true) {
            try {
                client = MasterClient.getConnectionWithRetry(context);
                stats = client.getMasterStats(Tracer.traceInfo(), context.rpcCreds());
                break;
            } catch (ThriftNotActiveServiceException e) {
                // Let it loop, fetching a new location
                log.debug("Contacted a Master which is no longer active, retrying");
                sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
            } finally {
                if (client != null)
                    MasterClient.close(client);
            }
        }
        unassignedTablets = stats.getUnassignedTablets();
        if (unassignedTablets > 0) {
            log.info("Found {} unassigned tablets, sleeping 3 seconds for tablet assignment", unassignedTablets);
            Thread.sleep(3000);
        }
    }
    assertEquals("Unassigned tablets were not assigned within 30 seconds", 0, unassignedTablets);
    List<Integer> counts = new ArrayList<>();
    for (TabletServerStatus server : stats.tServerInfo) {
        int count = 0;
        for (TableInfo table : server.tableMap.values()) {
            count += table.onlineTablets;
        }
        counts.add(count);
    }
    assertTrue("Expected to have at least two TabletServers", counts.size() > 1);
    for (int i = 1; i < counts.size(); i++) {
        int diff = Math.abs(counts.get(0) - counts.get(i));
        assertTrue("Expected difference in tablets to be less than or equal to " + counts.size() + " but was " + diff + ". Counts " + counts, diff <= counts.size());
    }
}
Also used : MasterMonitorInfo(org.apache.accumulo.core.master.thrift.MasterMonitorInfo) ThriftNotActiveServiceException(org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException) ClientContext(org.apache.accumulo.core.client.impl.ClientContext) ArrayList(java.util.ArrayList) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) MasterClientService(org.apache.accumulo.core.master.thrift.MasterClientService) TableInfo(org.apache.accumulo.core.master.thrift.TableInfo) Credentials(org.apache.accumulo.core.client.impl.Credentials) TabletServerStatus(org.apache.accumulo.core.master.thrift.TabletServerStatus)

Example 2 with MasterMonitorInfo

use of org.apache.accumulo.core.master.thrift.MasterMonitorInfo in project accumulo by apache.

the class BalanceInPresenceOfOfflineTableIT method test.

@Test
public void test() throws Exception {
    log.info("Test that balancing is not stopped by an offline table with outstanding migrations.");
    log.debug("starting test ingestion");
    TestIngest.Opts opts = new TestIngest.Opts();
    VerifyIngest.Opts vopts = new VerifyIngest.Opts();
    ClientConfiguration conf = cluster.getClientConfig();
    if (conf.hasSasl()) {
        opts.updateKerberosCredentials(cluster.getClientConfig());
        vopts.updateKerberosCredentials(cluster.getClientConfig());
    } else {
        opts.setPrincipal("root");
        vopts.setPrincipal("root");
    }
    vopts.rows = opts.rows = 200000;
    opts.setTableName(TEST_TABLE);
    TestIngest.ingest(connector, opts, new BatchWriterOpts());
    connector.tableOperations().flush(TEST_TABLE, null, null, true);
    vopts.setTableName(TEST_TABLE);
    VerifyIngest.verifyIngest(connector, vopts, new ScannerOpts());
    log.debug("waiting for balancing, up to ~5 minutes to allow for migration cleanup.");
    final long startTime = System.currentTimeMillis();
    long currentWait = 10 * 1000;
    boolean balancingWorked = false;
    Credentials creds = new Credentials(getAdminPrincipal(), getAdminToken());
    while (!balancingWorked && (System.currentTimeMillis() - startTime) < ((5 * 60 + 15) * 1000)) {
        Thread.sleep(currentWait);
        currentWait *= 2;
        log.debug("fetch the list of tablets assigned to each tserver.");
        MasterClientService.Iface client = null;
        MasterMonitorInfo stats = null;
        Instance instance = new ZooKeeperInstance(cluster.getClientConfig());
        while (true) {
            try {
                client = MasterClient.getConnectionWithRetry(new ClientContext(instance, creds, cluster.getClientConfig()));
                stats = client.getMasterStats(Tracer.traceInfo(), creds.toThrift(instance));
                break;
            } catch (ThriftSecurityException exception) {
                throw new AccumuloSecurityException(exception);
            } catch (ThriftNotActiveServiceException e) {
                // Let it loop, fetching a new location
                log.debug("Contacted a Master which is no longer active, retrying");
                sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
            } catch (TException exception) {
                throw new AccumuloException(exception);
            } finally {
                if (client != null) {
                    MasterClient.close(client);
                }
            }
        }
        if (stats.getTServerInfoSize() < 2) {
            log.debug("we need >= 2 servers. sleeping for {}ms", currentWait);
            continue;
        }
        if (stats.getUnassignedTablets() != 0) {
            log.debug("We shouldn't have unassigned tablets. sleeping for {}ms", currentWait);
            continue;
        }
        long[] tabletsPerServer = new long[stats.getTServerInfoSize()];
        Arrays.fill(tabletsPerServer, 0l);
        for (int i = 0; i < stats.getTServerInfoSize(); i++) {
            for (Map.Entry<String, TableInfo> entry : stats.getTServerInfo().get(i).getTableMap().entrySet()) {
                tabletsPerServer[i] += entry.getValue().getTablets();
            }
        }
        if (tabletsPerServer[0] <= 10) {
            log.debug("We should have > 10 tablets. sleeping for {}ms", currentWait);
            continue;
        }
        long min = NumberUtils.min(tabletsPerServer), max = NumberUtils.max(tabletsPerServer);
        log.debug("Min={}, Max={}", min, max);
        if ((min / ((double) max)) < 0.5) {
            log.debug("ratio of min to max tablets per server should be roughly even. sleeping for {}ms", currentWait);
            continue;
        }
        balancingWorked = true;
    }
    Assert.assertTrue("did not properly balance", balancingWorked);
}
Also used : TException(org.apache.thrift.TException) MasterMonitorInfo(org.apache.accumulo.core.master.thrift.MasterMonitorInfo) ThriftNotActiveServiceException(org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException) Instance(org.apache.accumulo.core.client.Instance) ZooKeeperInstance(org.apache.accumulo.core.client.ZooKeeperInstance) ZooKeeperInstance(org.apache.accumulo.core.client.ZooKeeperInstance) TestIngest(org.apache.accumulo.test.TestIngest) VerifyIngest(org.apache.accumulo.test.VerifyIngest) BatchWriterOpts(org.apache.accumulo.core.cli.BatchWriterOpts) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) TableInfo(org.apache.accumulo.core.master.thrift.TableInfo) AccumuloException(org.apache.accumulo.core.client.AccumuloException) ScannerOpts(org.apache.accumulo.core.cli.ScannerOpts) BatchWriterOpts(org.apache.accumulo.core.cli.BatchWriterOpts) ClientContext(org.apache.accumulo.core.client.impl.ClientContext) ThriftSecurityException(org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException) ScannerOpts(org.apache.accumulo.core.cli.ScannerOpts) MasterClientService(org.apache.accumulo.core.master.thrift.MasterClientService) Map(java.util.Map) ClientConfiguration(org.apache.accumulo.core.client.ClientConfiguration) Credentials(org.apache.accumulo.core.client.impl.Credentials) Test(org.junit.Test)

Example 3 with MasterMonitorInfo

use of org.apache.accumulo.core.master.thrift.MasterMonitorInfo in project accumulo by apache.

the class BulkImportMonitoringIT method test.

@Test
public void test() throws Exception {
    getCluster().getClusterControl().start(ServerType.MONITOR);
    final Connector c = getConnector();
    final String tableName = getUniqueNames(1)[0];
    c.tableOperations().create(tableName);
    c.tableOperations().setProperty(tableName, Property.TABLE_MAJC_RATIO.getKey(), "1");
    // splits to slow down bulk import
    SortedSet<Text> splits = new TreeSet<>();
    for (int i = 1; i < 0xf; i++) {
        splits.add(new Text(Integer.toHexString(i)));
    }
    c.tableOperations().addSplits(tableName, splits);
    MasterMonitorInfo stats = getCluster().getMasterMonitorInfo();
    assertEquals(1, stats.tServerInfo.size());
    assertEquals(0, stats.bulkImports.size());
    assertEquals(0, stats.tServerInfo.get(0).bulkImports.size());
    log.info("Creating lots of bulk import files");
    final FileSystem fs = getCluster().getFileSystem();
    final Path basePath = getCluster().getTemporaryPath();
    CachedConfiguration.setInstance(fs.getConf());
    final Path base = new Path(basePath, "testBulkLoad" + tableName);
    fs.delete(base, true);
    fs.mkdirs(base);
    ExecutorService es = Executors.newFixedThreadPool(5);
    List<Future<Pair<String, String>>> futures = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        final int which = i;
        futures.add(es.submit(new Callable<Pair<String, String>>() {

            @Override
            public Pair<String, String> call() throws Exception {
                Path bulkFailures = new Path(base, "failures" + which);
                Path files = new Path(base, "files" + which);
                fs.mkdirs(bulkFailures);
                fs.mkdirs(files);
                for (int i = 0; i < 10; i++) {
                    FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder().forFile(files.toString() + "/bulk_" + i + "." + RFile.EXTENSION, fs, fs.getConf()).withTableConfiguration(DefaultConfiguration.getInstance()).build();
                    writer.startDefaultLocalityGroup();
                    for (int j = 0x100; j < 0xfff; j += 3) {
                        writer.append(new Key(Integer.toHexString(j)), new Value(new byte[0]));
                    }
                    writer.close();
                }
                return new Pair<>(files.toString(), bulkFailures.toString());
            }
        }));
    }
    List<Pair<String, String>> dirs = new ArrayList<>();
    for (Future<Pair<String, String>> f : futures) {
        dirs.add(f.get());
    }
    log.info("Importing");
    long now = System.currentTimeMillis();
    List<Future<Object>> errs = new ArrayList<>();
    for (Pair<String, String> entry : dirs) {
        final String dir = entry.getFirst();
        final String err = entry.getSecond();
        errs.add(es.submit(new Callable<Object>() {

            @Override
            public Object call() throws Exception {
                c.tableOperations().importDirectory(tableName, dir, err, false);
                return null;
            }
        }));
    }
    es.shutdown();
    while (!es.isTerminated() && stats.bulkImports.size() + stats.tServerInfo.get(0).bulkImports.size() == 0) {
        es.awaitTermination(10, TimeUnit.MILLISECONDS);
        stats = getCluster().getMasterMonitorInfo();
    }
    log.info(stats.bulkImports.toString());
    assertTrue(stats.bulkImports.size() > 0);
    // look for exception
    for (Future<Object> err : errs) {
        err.get();
    }
    es.awaitTermination(2, TimeUnit.MINUTES);
    assertTrue(es.isTerminated());
    log.info(String.format("Completed in %.2f seconds", (System.currentTimeMillis() - now) / 1000.));
}
Also used : Connector(org.apache.accumulo.core.client.Connector) MasterMonitorInfo(org.apache.accumulo.core.master.thrift.MasterMonitorInfo) ArrayList(java.util.ArrayList) Callable(java.util.concurrent.Callable) TreeSet(java.util.TreeSet) FileSystem(org.apache.hadoop.fs.FileSystem) Pair(org.apache.accumulo.core.util.Pair) Path(org.apache.hadoop.fs.Path) FileSKVWriter(org.apache.accumulo.core.file.FileSKVWriter) Text(org.apache.hadoop.io.Text) ExecutorService(java.util.concurrent.ExecutorService) Value(org.apache.accumulo.core.data.Value) Future(java.util.concurrent.Future) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 4 with MasterMonitorInfo

use of org.apache.accumulo.core.master.thrift.MasterMonitorInfo in project accumulo by apache.

the class MiniAccumuloClusterImplTest method saneMonitorInfo.

@Test(timeout = 60000)
public void saneMonitorInfo() throws Exception {
    MasterMonitorInfo stats;
    while (true) {
        stats = accumulo.getMasterMonitorInfo();
        if (stats.tableMap.size() <= 2) {
            continue;
        }
        if (null != stats.tServerInfo && stats.tServerInfo.size() == NUM_TSERVERS) {
            break;
        }
    }
    List<MasterState> validStates = Arrays.asList(MasterState.values());
    List<MasterGoalState> validGoals = Arrays.asList(MasterGoalState.values());
    Assert.assertTrue("master state should be valid.", validStates.contains(stats.state));
    Assert.assertTrue("master goal state should be in " + validGoals + ". is " + stats.goalState, validGoals.contains(stats.goalState));
    Assert.assertNotNull("should have a table map.", stats.tableMap);
    Assert.assertTrue("root table should exist in " + stats.tableMap.keySet(), stats.tableMap.keySet().contains(RootTable.ID.canonicalID()));
    Assert.assertTrue("meta table should exist in " + stats.tableMap.keySet(), stats.tableMap.keySet().contains(MetadataTable.ID.canonicalID()));
    Assert.assertTrue("our test table should exist in " + stats.tableMap.keySet(), stats.tableMap.keySet().contains(testTableID));
    Assert.assertNotNull("there should be tservers.", stats.tServerInfo);
    Assert.assertEquals(NUM_TSERVERS, stats.tServerInfo.size());
}
Also used : MasterMonitorInfo(org.apache.accumulo.core.master.thrift.MasterMonitorInfo) MasterGoalState(org.apache.accumulo.core.master.thrift.MasterGoalState) MasterState(org.apache.accumulo.core.master.thrift.MasterState) Test(org.junit.Test)

Example 5 with MasterMonitorInfo

use of org.apache.accumulo.core.master.thrift.MasterMonitorInfo in project accumulo by apache.

the class Master method getMasterMonitorInfo.

public MasterMonitorInfo getMasterMonitorInfo() {
    final MasterMonitorInfo result = new MasterMonitorInfo();
    result.tServerInfo = new ArrayList<>();
    result.tableMap = new DefaultMap<>(new TableInfo());
    for (Entry<TServerInstance, TabletServerStatus> serverEntry : tserverStatus.entrySet()) {
        final TabletServerStatus status = serverEntry.getValue();
        result.tServerInfo.add(status);
        for (Entry<String, TableInfo> entry : status.tableMap.entrySet()) {
            TableInfoUtil.add(result.tableMap.get(entry.getKey()), entry.getValue());
        }
    }
    result.badTServers = new HashMap<>();
    synchronized (badServers) {
        for (TServerInstance bad : badServers.keySet()) {
            result.badTServers.put(bad.hostPort(), TabletServerState.UNRESPONSIVE.getId());
        }
    }
    result.state = getMasterState();
    result.goalState = getMasterGoalState();
    result.unassignedTablets = displayUnassigned();
    result.serversShuttingDown = new HashSet<>();
    synchronized (serversToShutdown) {
        for (TServerInstance server : serversToShutdown) result.serversShuttingDown.add(server.hostPort());
    }
    DeadServerList obit = new DeadServerList(ZooUtil.getRoot(getInstance()) + Constants.ZDEADTSERVERS);
    result.deadTabletServers = obit.getList();
    result.bulkImports = bulkImportStatus.getBulkLoadStatus();
    return result;
}
Also used : MasterMonitorInfo(org.apache.accumulo.core.master.thrift.MasterMonitorInfo) TableInfo(org.apache.accumulo.core.master.thrift.TableInfo) DeadServerList(org.apache.accumulo.server.master.state.DeadServerList) TServerInstance(org.apache.accumulo.server.master.state.TServerInstance) TabletServerStatus(org.apache.accumulo.core.master.thrift.TabletServerStatus)

Aggregations

MasterMonitorInfo (org.apache.accumulo.core.master.thrift.MasterMonitorInfo)19 TabletServerStatus (org.apache.accumulo.core.master.thrift.TabletServerStatus)10 Test (org.junit.Test)8 ThriftNotActiveServiceException (org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException)7 TableInfo (org.apache.accumulo.core.master.thrift.TableInfo)7 Connector (org.apache.accumulo.core.client.Connector)6 MasterClientService (org.apache.accumulo.core.master.thrift.MasterClientService)6 ArrayList (java.util.ArrayList)5 ClientContext (org.apache.accumulo.core.client.impl.ClientContext)5 Credentials (org.apache.accumulo.core.client.impl.Credentials)5 DeadServer (org.apache.accumulo.core.master.thrift.DeadServer)4 TreeSet (java.util.TreeSet)3 GET (javax.ws.rs.GET)3 WebApplicationException (javax.ws.rs.WebApplicationException)3 BatchWriterOpts (org.apache.accumulo.core.cli.BatchWriterOpts)3 Instance (org.apache.accumulo.core.client.Instance)3 PasswordToken (org.apache.accumulo.core.client.security.tokens.PasswordToken)3 Text (org.apache.hadoop.io.Text)3 Callable (java.util.concurrent.Callable)2 ExecutorService (java.util.concurrent.ExecutorService)2