Search in sources :

Example 1 with ThriftNotActiveServiceException

use of org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException in project accumulo by apache.

the class BalanceAfterCommsFailureIT method checkBalance.

private void checkBalance(Connector c) throws Exception {
    Credentials creds = new Credentials("root", new PasswordToken(ROOT_PASSWORD));
    ClientContext context = new ClientContext(c.getInstance(), creds, getClientConfig());
    MasterMonitorInfo stats = null;
    int unassignedTablets = 1;
    for (int i = 0; unassignedTablets > 0 && i < 10; i++) {
        MasterClientService.Iface client = null;
        while (true) {
            try {
                client = MasterClient.getConnectionWithRetry(context);
                stats = client.getMasterStats(Tracer.traceInfo(), context.rpcCreds());
                break;
            } catch (ThriftNotActiveServiceException e) {
                // Let it loop, fetching a new location
                log.debug("Contacted a Master which is no longer active, retrying");
                sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
            } finally {
                if (client != null)
                    MasterClient.close(client);
            }
        }
        unassignedTablets = stats.getUnassignedTablets();
        if (unassignedTablets > 0) {
            log.info("Found {} unassigned tablets, sleeping 3 seconds for tablet assignment", unassignedTablets);
            Thread.sleep(3000);
        }
    }
    assertEquals("Unassigned tablets were not assigned within 30 seconds", 0, unassignedTablets);
    List<Integer> counts = new ArrayList<>();
    for (TabletServerStatus server : stats.tServerInfo) {
        int count = 0;
        for (TableInfo table : server.tableMap.values()) {
            count += table.onlineTablets;
        }
        counts.add(count);
    }
    assertTrue("Expected to have at least two TabletServers", counts.size() > 1);
    for (int i = 1; i < counts.size(); i++) {
        int diff = Math.abs(counts.get(0) - counts.get(i));
        assertTrue("Expected difference in tablets to be less than or equal to " + counts.size() + " but was " + diff + ". Counts " + counts, diff <= counts.size());
    }
}
Also used : MasterMonitorInfo(org.apache.accumulo.core.master.thrift.MasterMonitorInfo) ThriftNotActiveServiceException(org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException) ClientContext(org.apache.accumulo.core.client.impl.ClientContext) ArrayList(java.util.ArrayList) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) MasterClientService(org.apache.accumulo.core.master.thrift.MasterClientService) TableInfo(org.apache.accumulo.core.master.thrift.TableInfo) Credentials(org.apache.accumulo.core.client.impl.Credentials) TabletServerStatus(org.apache.accumulo.core.master.thrift.TabletServerStatus)

Example 2 with ThriftNotActiveServiceException

use of org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException in project accumulo by apache.

the class BalanceInPresenceOfOfflineTableIT method test.

@Test
public void test() throws Exception {
    log.info("Test that balancing is not stopped by an offline table with outstanding migrations.");
    log.debug("starting test ingestion");
    TestIngest.Opts opts = new TestIngest.Opts();
    VerifyIngest.Opts vopts = new VerifyIngest.Opts();
    ClientConfiguration conf = cluster.getClientConfig();
    if (conf.hasSasl()) {
        opts.updateKerberosCredentials(cluster.getClientConfig());
        vopts.updateKerberosCredentials(cluster.getClientConfig());
    } else {
        opts.setPrincipal("root");
        vopts.setPrincipal("root");
    }
    vopts.rows = opts.rows = 200000;
    opts.setTableName(TEST_TABLE);
    TestIngest.ingest(connector, opts, new BatchWriterOpts());
    connector.tableOperations().flush(TEST_TABLE, null, null, true);
    vopts.setTableName(TEST_TABLE);
    VerifyIngest.verifyIngest(connector, vopts, new ScannerOpts());
    log.debug("waiting for balancing, up to ~5 minutes to allow for migration cleanup.");
    final long startTime = System.currentTimeMillis();
    long currentWait = 10 * 1000;
    boolean balancingWorked = false;
    Credentials creds = new Credentials(getAdminPrincipal(), getAdminToken());
    while (!balancingWorked && (System.currentTimeMillis() - startTime) < ((5 * 60 + 15) * 1000)) {
        Thread.sleep(currentWait);
        currentWait *= 2;
        log.debug("fetch the list of tablets assigned to each tserver.");
        MasterClientService.Iface client = null;
        MasterMonitorInfo stats = null;
        Instance instance = new ZooKeeperInstance(cluster.getClientConfig());
        while (true) {
            try {
                client = MasterClient.getConnectionWithRetry(new ClientContext(instance, creds, cluster.getClientConfig()));
                stats = client.getMasterStats(Tracer.traceInfo(), creds.toThrift(instance));
                break;
            } catch (ThriftSecurityException exception) {
                throw new AccumuloSecurityException(exception);
            } catch (ThriftNotActiveServiceException e) {
                // Let it loop, fetching a new location
                log.debug("Contacted a Master which is no longer active, retrying");
                sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
            } catch (TException exception) {
                throw new AccumuloException(exception);
            } finally {
                if (client != null) {
                    MasterClient.close(client);
                }
            }
        }
        if (stats.getTServerInfoSize() < 2) {
            log.debug("we need >= 2 servers. sleeping for {}ms", currentWait);
            continue;
        }
        if (stats.getUnassignedTablets() != 0) {
            log.debug("We shouldn't have unassigned tablets. sleeping for {}ms", currentWait);
            continue;
        }
        long[] tabletsPerServer = new long[stats.getTServerInfoSize()];
        Arrays.fill(tabletsPerServer, 0l);
        for (int i = 0; i < stats.getTServerInfoSize(); i++) {
            for (Map.Entry<String, TableInfo> entry : stats.getTServerInfo().get(i).getTableMap().entrySet()) {
                tabletsPerServer[i] += entry.getValue().getTablets();
            }
        }
        if (tabletsPerServer[0] <= 10) {
            log.debug("We should have > 10 tablets. sleeping for {}ms", currentWait);
            continue;
        }
        long min = NumberUtils.min(tabletsPerServer), max = NumberUtils.max(tabletsPerServer);
        log.debug("Min={}, Max={}", min, max);
        if ((min / ((double) max)) < 0.5) {
            log.debug("ratio of min to max tablets per server should be roughly even. sleeping for {}ms", currentWait);
            continue;
        }
        balancingWorked = true;
    }
    Assert.assertTrue("did not properly balance", balancingWorked);
}
Also used : TException(org.apache.thrift.TException) MasterMonitorInfo(org.apache.accumulo.core.master.thrift.MasterMonitorInfo) ThriftNotActiveServiceException(org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException) Instance(org.apache.accumulo.core.client.Instance) ZooKeeperInstance(org.apache.accumulo.core.client.ZooKeeperInstance) ZooKeeperInstance(org.apache.accumulo.core.client.ZooKeeperInstance) TestIngest(org.apache.accumulo.test.TestIngest) VerifyIngest(org.apache.accumulo.test.VerifyIngest) BatchWriterOpts(org.apache.accumulo.core.cli.BatchWriterOpts) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) TableInfo(org.apache.accumulo.core.master.thrift.TableInfo) AccumuloException(org.apache.accumulo.core.client.AccumuloException) ScannerOpts(org.apache.accumulo.core.cli.ScannerOpts) BatchWriterOpts(org.apache.accumulo.core.cli.BatchWriterOpts) ClientContext(org.apache.accumulo.core.client.impl.ClientContext) ThriftSecurityException(org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException) ScannerOpts(org.apache.accumulo.core.cli.ScannerOpts) MasterClientService(org.apache.accumulo.core.master.thrift.MasterClientService) Map(java.util.Map) ClientConfiguration(org.apache.accumulo.core.client.ClientConfiguration) Credentials(org.apache.accumulo.core.client.impl.Credentials) Test(org.junit.Test)

Example 3 with ThriftNotActiveServiceException

use of org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException in project accumulo by apache.

the class TableOperationsImpl method _flush.

private void _flush(Table.ID tableId, Text start, Text end, boolean wait) throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
    try {
        long flushID;
        while (true) {
            MasterClientService.Iface client = null;
            try {
                client = MasterClient.getConnectionWithRetry(context);
                flushID = client.initiateFlush(Tracer.traceInfo(), context.rpcCreds(), tableId.canonicalID());
                break;
            } catch (TTransportException tte) {
                log.debug("Failed to call initiateFlush, retrying ... ", tte);
                sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
            } catch (ThriftNotActiveServiceException e) {
                // Let it loop, fetching a new location
                log.debug("Contacted a Master which is no longer active, retrying");
                sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
            } finally {
                MasterClient.close(client);
            }
        }
        while (true) {
            MasterClientService.Iface client = null;
            try {
                client = MasterClient.getConnectionWithRetry(context);
                client.waitForFlush(Tracer.traceInfo(), context.rpcCreds(), tableId.canonicalID(), TextUtil.getByteBuffer(start), TextUtil.getByteBuffer(end), flushID, wait ? Long.MAX_VALUE : 1);
                break;
            } catch (TTransportException tte) {
                log.debug("Failed to call initiateFlush, retrying ... ", tte);
                sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
            } catch (ThriftNotActiveServiceException e) {
                // Let it loop, fetching a new location
                log.debug("Contacted a Master which is no longer active, retrying");
                sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
            } finally {
                MasterClient.close(client);
            }
        }
    } catch (ThriftSecurityException e) {
        switch(e.getCode()) {
            case TABLE_DOESNT_EXIST:
                throw new TableNotFoundException(tableId.canonicalID(), null, e.getMessage(), e);
            default:
                log.debug("flush security exception on table id {}", tableId);
                throw new AccumuloSecurityException(e.user, e.code, e);
        }
    } catch (ThriftTableOperationException e) {
        switch(e.getType()) {
            case NOTFOUND:
                throw new TableNotFoundException(e);
            default:
                throw new AccumuloException(e.description, e);
        }
    } catch (Exception e) {
        throw new AccumuloException(e);
    }
}
Also used : TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) ThriftNotActiveServiceException(org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException) MasterClientService(org.apache.accumulo.core.master.thrift.MasterClientService) TTransportException(org.apache.thrift.transport.TTransportException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) ThriftTableOperationException(org.apache.accumulo.core.client.impl.thrift.ThriftTableOperationException) ThriftSecurityException(org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException) TableOfflineException(org.apache.accumulo.core.client.TableOfflineException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) NamespaceNotFoundException(org.apache.accumulo.core.client.NamespaceNotFoundException) ThriftNotActiveServiceException(org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException) ThriftSecurityException(org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException) TableExistsException(org.apache.accumulo.core.client.TableExistsException) TableDeletedException(org.apache.accumulo.core.client.TableDeletedException) TException(org.apache.thrift.TException) IOException(java.io.IOException) ThriftTableOperationException(org.apache.accumulo.core.client.impl.thrift.ThriftTableOperationException) TTransportException(org.apache.thrift.transport.TTransportException) NamespaceExistsException(org.apache.accumulo.core.client.NamespaceExistsException) FileNotFoundException(java.io.FileNotFoundException) NotServingTabletException(org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) TApplicationException(org.apache.thrift.TApplicationException) AccumuloException(org.apache.accumulo.core.client.AccumuloException)

Example 4 with ThriftNotActiveServiceException

use of org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException in project accumulo by apache.

the class MasterClient method executeGeneric.

public static void executeGeneric(ClientContext context, ClientExec<MasterClientService.Client> exec) throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
    MasterClientService.Client client = null;
    while (true) {
        try {
            client = getConnectionWithRetry(context);
            exec.execute(client);
            break;
        } catch (TTransportException tte) {
            log.debug("MasterClient request failed, retrying ... ", tte);
            sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
        } catch (ThriftSecurityException e) {
            throw new AccumuloSecurityException(e.user, e.code, e);
        } catch (AccumuloException e) {
            throw e;
        } catch (ThriftTableOperationException e) {
            switch(e.getType()) {
                case NAMESPACE_NOTFOUND:
                    throw new TableNotFoundException(e.getTableName(), new NamespaceNotFoundException(e));
                case NOTFOUND:
                    throw new TableNotFoundException(e);
                default:
                    throw new AccumuloException(e);
            }
        } catch (ThriftNotActiveServiceException e) {
            // Let it loop, fetching a new location
            log.debug("Contacted a Master which is no longer active, re-creating the connection to the active Master");
        } catch (Exception e) {
            throw new AccumuloException(e);
        } finally {
            if (client != null)
                close(client);
        }
    }
}
Also used : AccumuloException(org.apache.accumulo.core.client.AccumuloException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) ThriftNotActiveServiceException(org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException) MasterClientService(org.apache.accumulo.core.master.thrift.MasterClientService) TTransportException(org.apache.thrift.transport.TTransportException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) ThriftTableOperationException(org.apache.accumulo.core.client.impl.thrift.ThriftTableOperationException) ThriftSecurityException(org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException) NamespaceNotFoundException(org.apache.accumulo.core.client.NamespaceNotFoundException) TTransportException(org.apache.thrift.transport.TTransportException) ThriftNotActiveServiceException(org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException) UnknownHostException(java.net.UnknownHostException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) NamespaceNotFoundException(org.apache.accumulo.core.client.NamespaceNotFoundException) ThriftSecurityException(org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) ThriftTableOperationException(org.apache.accumulo.core.client.impl.thrift.ThriftTableOperationException)

Example 5 with ThriftNotActiveServiceException

use of org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException in project accumulo by apache.

the class GetMasterStats method main.

public static void main(String[] args) throws Exception {
    MasterClientService.Iface client = null;
    MasterMonitorInfo stats = null;
    Instance instance = HdfsZooInstance.getInstance();
    AccumuloServerContext context = new AccumuloServerContext(instance, new ServerConfigurationFactory(instance));
    while (true) {
        try {
            client = MasterClient.getConnectionWithRetry(context);
            stats = client.getMasterStats(Tracer.traceInfo(), context.rpcCreds());
            break;
        } catch (ThriftNotActiveServiceException e) {
            // Let it loop, fetching a new location
            sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
        } finally {
            if (client != null)
                MasterClient.close(client);
        }
    }
    out(0, "State: " + stats.state.name());
    out(0, "Goal State: " + stats.goalState.name());
    if (stats.serversShuttingDown != null && stats.serversShuttingDown.size() > 0) {
        out(0, "Servers to shutdown");
        for (String server : stats.serversShuttingDown) {
            out(1, "%s", server);
        }
    }
    out(0, "Unassigned tablets: %d", stats.unassignedTablets);
    if (stats.badTServers != null && stats.badTServers.size() > 0) {
        out(0, "Bad servers");
        for (Entry<String, Byte> entry : stats.badTServers.entrySet()) {
            out(1, "%s: %d", entry.getKey(), (int) entry.getValue());
        }
    }
    out(0, "Dead tablet servers count: %s", stats.deadTabletServers.size());
    for (DeadServer dead : stats.deadTabletServers) {
        out(1, "Dead tablet server: %s", dead.server);
        out(2, "Last report: %s", new SimpleDateFormat().format(new Date(dead.lastStatus)));
        out(2, "Cause: %s", dead.status);
    }
    out(0, "Bulk imports: %s", stats.bulkImports.size());
    for (BulkImportStatus bulk : stats.bulkImports) {
        out(1, "Import directory: %s", bulk.filename);
        out(2, "Bulk state %s", bulk.state);
        out(2, "Bulk start %s", bulk.startTime);
    }
    if (stats.tableMap != null && stats.tableMap.size() > 0) {
        out(0, "Tables");
        for (Entry<String, TableInfo> entry : stats.tableMap.entrySet()) {
            TableInfo v = entry.getValue();
            out(1, "%s", entry.getKey());
            out(2, "Records: %d", v.recs);
            out(2, "Records in Memory: %d", v.recsInMemory);
            out(2, "Tablets: %d", v.tablets);
            out(2, "Online Tablets: %d", v.onlineTablets);
            out(2, "Ingest Rate: %.2f", v.ingestRate);
            out(2, "Query Rate: %.2f", v.queryRate);
        }
    }
    if (stats.tServerInfo != null && stats.tServerInfo.size() > 0) {
        out(0, "Tablet Servers");
        long now = System.currentTimeMillis();
        for (TabletServerStatus server : stats.tServerInfo) {
            TableInfo summary = TableInfoUtil.summarizeTableStats(server);
            out(1, "Name: %s", server.name);
            out(2, "Ingest: %.2f", summary.ingestRate);
            out(2, "Last Contact: %s", server.lastContact);
            out(2, "OS Load Average: %.2f", server.osLoad);
            out(2, "Queries: %.2f", summary.queryRate);
            out(2, "Time Difference: %.1f", ((now - server.lastContact) / 1000.));
            out(2, "Total Records: %d", summary.recs);
            out(2, "Lookups: %d", server.lookups);
            if (server.holdTime > 0)
                out(2, "Hold Time: %d", server.holdTime);
            if (server.tableMap != null && server.tableMap.size() > 0) {
                out(2, "Tables");
                for (Entry<String, TableInfo> status : server.tableMap.entrySet()) {
                    TableInfo info = status.getValue();
                    out(3, "Table: %s", status.getKey());
                    out(4, "Tablets: %d", info.onlineTablets);
                    out(4, "Records: %d", info.recs);
                    out(4, "Records in Memory: %d", info.recsInMemory);
                    out(4, "Ingest: %.2f", info.ingestRate);
                    out(4, "Queries: %.2f", info.queryRate);
                    out(4, "Major Compacting: %d", info.majors == null ? 0 : info.majors.running);
                    out(4, "Queued for Major Compaction: %d", info.majors == null ? 0 : info.majors.queued);
                    out(4, "Minor Compacting: %d", info.minors == null ? 0 : info.minors.running);
                    out(4, "Queued for Minor Compaction: %d", info.minors == null ? 0 : info.minors.queued);
                }
            }
            out(2, "Recoveries: %d", server.logSorts.size());
            for (RecoveryStatus sort : server.logSorts) {
                out(3, "File: %s", sort.name);
                out(3, "Progress: %.2f%%", sort.progress * 100);
                out(3, "Time running: %s", sort.runtime / 1000.);
            }
            out(3, "Bulk imports: %s", stats.bulkImports.size());
            for (BulkImportStatus bulk : stats.bulkImports) {
                out(4, "Import file: %s", bulk.filename);
                out(5, "Bulk state %s", bulk.state);
                out(5, "Bulk start %s", bulk.startTime);
            }
        }
    }
}
Also used : MasterMonitorInfo(org.apache.accumulo.core.master.thrift.MasterMonitorInfo) AccumuloServerContext(org.apache.accumulo.server.AccumuloServerContext) ThriftNotActiveServiceException(org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException) Instance(org.apache.accumulo.core.client.Instance) HdfsZooInstance(org.apache.accumulo.server.client.HdfsZooInstance) ServerConfigurationFactory(org.apache.accumulo.server.conf.ServerConfigurationFactory) DeadServer(org.apache.accumulo.core.master.thrift.DeadServer) Date(java.util.Date) BulkImportStatus(org.apache.accumulo.core.master.thrift.BulkImportStatus) MasterClientService(org.apache.accumulo.core.master.thrift.MasterClientService) TableInfo(org.apache.accumulo.core.master.thrift.TableInfo) RecoveryStatus(org.apache.accumulo.core.master.thrift.RecoveryStatus) SimpleDateFormat(java.text.SimpleDateFormat) TabletServerStatus(org.apache.accumulo.core.master.thrift.TabletServerStatus)

Aggregations

ThriftNotActiveServiceException (org.apache.accumulo.core.client.impl.thrift.ThriftNotActiveServiceException)11 MasterClientService (org.apache.accumulo.core.master.thrift.MasterClientService)9 ClientContext (org.apache.accumulo.core.client.impl.ClientContext)7 Credentials (org.apache.accumulo.core.client.impl.Credentials)7 MasterMonitorInfo (org.apache.accumulo.core.master.thrift.MasterMonitorInfo)7 TableInfo (org.apache.accumulo.core.master.thrift.TableInfo)6 PasswordToken (org.apache.accumulo.core.client.security.tokens.PasswordToken)5 TabletServerStatus (org.apache.accumulo.core.master.thrift.TabletServerStatus)5 AccumuloException (org.apache.accumulo.core.client.AccumuloException)4 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)4 Instance (org.apache.accumulo.core.client.Instance)4 ThriftSecurityException (org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException)4 BatchWriterOpts (org.apache.accumulo.core.cli.BatchWriterOpts)3 Connector (org.apache.accumulo.core.client.Connector)3 TestIngest (org.apache.accumulo.test.TestIngest)3 TException (org.apache.thrift.TException)3 Test (org.junit.Test)3 ArrayList (java.util.ArrayList)2 ClientConfiguration (org.apache.accumulo.core.client.ClientConfiguration)2 NamespaceNotFoundException (org.apache.accumulo.core.client.NamespaceNotFoundException)2