Search in sources :

Example 41 with Table

use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.

the class AbstractInputFormat method getSplits.

/**
 * Gets the splits of the tables that have been set on the job by reading the metadata table for the specified ranges.
 *
 * @return the splits from the tables based on the ranges.
 * @throws java.io.IOException
 *           if a table set on the job doesn't exist or an error occurs initializing the tablet locator
 */
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    Level logLevel = getLogLevel(job);
    log.setLevel(logLevel);
    validateOptions(job);
    Random random = new Random();
    LinkedList<InputSplit> splits = new LinkedList<>();
    Map<String, InputTableConfig> tableConfigs = getInputTableConfigs(job);
    for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
        String tableName = tableConfigEntry.getKey();
        InputTableConfig tableConfig = tableConfigEntry.getValue();
        Instance instance = getInstance(job);
        Table.ID tableId;
        // resolve table name to id once, and use id from this point forward
        if (DeprecationUtil.isMockInstance(instance)) {
            tableId = Table.ID.of("");
        } else {
            try {
                tableId = Tables.getTableId(instance, tableName);
            } catch (TableNotFoundException e) {
                throw new IOException(e);
            }
        }
        Authorizations auths = getScanAuthorizations(job);
        String principal = getPrincipal(job);
        AuthenticationToken token = getAuthenticationToken(job);
        boolean batchScan = InputConfigurator.isBatchScan(CLASS, job);
        boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners() || tableConfig.shouldUseLocalIterators());
        if (batchScan && !supportBatchScan)
            throw new IllegalArgumentException("BatchScanner optimization not available for offline scan, isolated, or local iterators");
        boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
        if (batchScan && !autoAdjust)
            throw new IllegalArgumentException("AutoAdjustRanges must be enabled when using BatchScanner optimization");
        List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
        if (ranges.isEmpty()) {
            ranges = new ArrayList<>(1);
            ranges.add(new Range());
        }
        // get the metadata information for these ranges
        Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
        TabletLocator tl;
        try {
            if (tableConfig.isOfflineScan()) {
                binnedRanges = binOfflineTable(job, tableId, ranges);
                while (binnedRanges == null) {
                    // Some tablets were still online, try again
                    // sleep randomly between 100 and 200 ms
                    sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
                    binnedRanges = binOfflineTable(job, tableId, ranges);
                }
            } else {
                tl = InputConfigurator.getTabletLocator(CLASS, job, tableId);
                // its possible that the cache could contain complete, but old information about a tables tablets... so clear it
                tl.invalidateCache();
                ClientContext context = new ClientContext(getInstance(job), new Credentials(getPrincipal(job), getAuthenticationToken(job)), getClientConfiguration(job));
                while (!tl.binRanges(context, ranges, binnedRanges).isEmpty()) {
                    if (!DeprecationUtil.isMockInstance(instance)) {
                        String tableIdStr = tableId.canonicalID();
                        if (!Tables.exists(instance, tableId))
                            throw new TableDeletedException(tableIdStr);
                        if (Tables.getTableState(instance, tableId) == TableState.OFFLINE)
                            throw new TableOfflineException(instance, tableIdStr);
                    }
                    binnedRanges.clear();
                    log.warn("Unable to locate bins for specified ranges. Retrying.");
                    // sleep randomly between 100 and 200 ms
                    sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
                    tl.invalidateCache();
                }
            }
        } catch (Exception e) {
            throw new IOException(e);
        }
        HashMap<Range, ArrayList<String>> splitsToAdd = null;
        if (!autoAdjust)
            splitsToAdd = new HashMap<>();
        HashMap<String, String> hostNameCache = new HashMap<>();
        for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
            String ip = tserverBin.getKey().split(":", 2)[0];
            String location = hostNameCache.get(ip);
            if (location == null) {
                InetAddress inetAddress = InetAddress.getByName(ip);
                location = inetAddress.getCanonicalHostName();
                hostNameCache.put(ip, location);
            }
            for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
                Range ke = extentRanges.getKey().toDataRange();
                if (batchScan) {
                    // group ranges by tablet to be read by a BatchScanner
                    ArrayList<Range> clippedRanges = new ArrayList<>();
                    for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r));
                    BatchInputSplit split = new BatchInputSplit(tableName, tableId, clippedRanges, new String[] { location });
                    SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
                    splits.add(split);
                } else {
                    // not grouping by tablet
                    for (Range r : extentRanges.getValue()) {
                        if (autoAdjust) {
                            // divide ranges into smaller ranges, based on the tablets
                            RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(), ke.clip(r), new String[] { location });
                            SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
                            split.setOffline(tableConfig.isOfflineScan());
                            split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                            split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
                            splits.add(split);
                        } else {
                            // don't divide ranges
                            ArrayList<String> locations = splitsToAdd.get(r);
                            if (locations == null)
                                locations = new ArrayList<>(1);
                            locations.add(location);
                            splitsToAdd.put(r, locations);
                        }
                    }
                }
            }
        }
        if (!autoAdjust)
            for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
                RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(), entry.getKey(), entry.getValue().toArray(new String[0]));
                SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
                split.setOffline(tableConfig.isOfflineScan());
                split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
                splits.add(split);
            }
    }
    return splits.toArray(new InputSplit[splits.size()]);
}
Also used : AuthenticationToken(org.apache.accumulo.core.client.security.tokens.AuthenticationToken) TableOfflineException(org.apache.accumulo.core.client.TableOfflineException) Instance(org.apache.accumulo.core.client.Instance) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) BatchInputSplit(org.apache.accumulo.core.client.mapred.impl.BatchInputSplit) KeyExtent(org.apache.accumulo.core.data.impl.KeyExtent) TableDeletedException(org.apache.accumulo.core.client.TableDeletedException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) Random(java.util.Random) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) BatchInputSplit(org.apache.accumulo.core.client.mapred.impl.BatchInputSplit) InputSplit(org.apache.hadoop.mapred.InputSplit) Authorizations(org.apache.accumulo.core.security.Authorizations) Table(org.apache.accumulo.core.client.impl.Table) ClientContext(org.apache.accumulo.core.client.impl.ClientContext) IOException(java.io.IOException) Range(org.apache.accumulo.core.data.Range) LinkedList(java.util.LinkedList) TableOfflineException(org.apache.accumulo.core.client.TableOfflineException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) TableDeletedException(org.apache.accumulo.core.client.TableDeletedException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) TabletLocator(org.apache.accumulo.core.client.impl.TabletLocator) InputTableConfig(org.apache.accumulo.core.client.mapreduce.InputTableConfig) Level(org.apache.log4j.Level) Map(java.util.Map) HashMap(java.util.HashMap) InetAddress(java.net.InetAddress) Credentials(org.apache.accumulo.core.client.impl.Credentials)

Example 42 with Table

use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.

the class ClientServiceHandler method getDiskUsage.

@Override
public List<TDiskUsage> getDiskUsage(Set<String> tables, TCredentials credentials) throws ThriftTableOperationException, ThriftSecurityException, TException {
    try {
        HashSet<Table.ID> tableIds = new HashSet<>();
        for (String table : tables) {
            // ensure that table table exists
            Table.ID tableId = checkTableId(instance, table, null);
            tableIds.add(tableId);
            Namespace.ID namespaceId = Tables.getNamespaceId(instance, tableId);
            if (!security.canScan(credentials, tableId, namespaceId))
                throw new ThriftSecurityException(credentials.getPrincipal(), SecurityErrorCode.PERMISSION_DENIED);
        }
        // use the same set of tableIds that were validated above to avoid race conditions
        Map<TreeSet<String>, Long> diskUsage = TableDiskUsage.getDiskUsage(tableIds, fs, context.getConnector());
        List<TDiskUsage> retUsages = new ArrayList<>();
        for (Map.Entry<TreeSet<String>, Long> usageItem : diskUsage.entrySet()) {
            retUsages.add(new TDiskUsage(new ArrayList<>(usageItem.getKey()), usageItem.getValue()));
        }
        return retUsages;
    } catch (AccumuloSecurityException e) {
        throw e.asThriftException();
    } catch (AccumuloException | TableNotFoundException | IOException e) {
        throw new TException(e);
    }
}
Also used : TException(org.apache.thrift.TException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) Table(org.apache.accumulo.core.client.impl.Table) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ThriftSecurityException(org.apache.accumulo.core.client.impl.thrift.ThriftSecurityException) Namespace(org.apache.accumulo.core.client.impl.Namespace) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) TreeSet(java.util.TreeSet) TDiskUsage(org.apache.accumulo.core.client.impl.thrift.TDiskUsage) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) Map(java.util.Map) HashMap(java.util.HashMap) HashSet(java.util.HashSet)

Example 43 with Table

use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.

the class RandomizeVolumes method randomize.

public static int randomize(Connector c, String tableName) throws IOException, AccumuloSecurityException, AccumuloException, TableNotFoundException {
    final VolumeManager vm = VolumeManagerImpl.get();
    if (vm.getVolumes().size() < 2) {
        log.error("There are not enough volumes configured");
        return 1;
    }
    String tblStr = c.tableOperations().tableIdMap().get(tableName);
    if (null == tblStr) {
        log.error("Could not determine the table ID for table {}", tableName);
        return 2;
    }
    Table.ID tableId = Table.ID.of(tblStr);
    TableState tableState = TableManager.getInstance().getTableState(tableId);
    if (TableState.OFFLINE != tableState) {
        log.info("Taking {} offline", tableName);
        c.tableOperations().offline(tableName, true);
        log.info("{} offline", tableName);
    }
    SimpleThreadPool pool = new SimpleThreadPool(50, "directory maker");
    log.info("Rewriting entries for {}", tableName);
    Scanner scanner = c.createScanner(MetadataTable.NAME, Authorizations.EMPTY);
    DIRECTORY_COLUMN.fetch(scanner);
    scanner.setRange(TabletsSection.getRange(tableId));
    BatchWriter writer = c.createBatchWriter(MetadataTable.NAME, null);
    int count = 0;
    for (Entry<Key, Value> entry : scanner) {
        String oldLocation = entry.getValue().toString();
        String directory;
        if (oldLocation.contains(":")) {
            String[] parts = oldLocation.split(Path.SEPARATOR);
            Table.ID tableIdEntry = Table.ID.of(parts[parts.length - 2]);
            if (!tableIdEntry.equals(tableId)) {
                log.error("Unexpected table id found: {}, expected {}; skipping", tableIdEntry, tableId);
                continue;
            }
            directory = parts[parts.length - 1];
        } else {
            directory = oldLocation.substring(Path.SEPARATOR.length());
        }
        Key key = entry.getKey();
        Mutation m = new Mutation(key.getRow());
        VolumeChooserEnvironment chooserEnv = new VolumeChooserEnvironment(tableId);
        final String newLocation = vm.choose(chooserEnv, ServerConstants.getBaseUris()) + Path.SEPARATOR + ServerConstants.TABLE_DIR + Path.SEPARATOR + tableId + Path.SEPARATOR + directory;
        m.put(key.getColumnFamily(), key.getColumnQualifier(), new Value(newLocation.getBytes(UTF_8)));
        if (log.isTraceEnabled()) {
            log.trace("Replacing {} with {}", oldLocation, newLocation);
        }
        writer.addMutation(m);
        pool.submit(new Runnable() {

            @Override
            public void run() {
                try {
                    vm.mkdirs(new Path(newLocation));
                } catch (IOException ex) {
                // nevermind
                }
            }
        });
        count++;
    }
    writer.close();
    pool.shutdown();
    while (!pool.isTerminated()) {
        log.trace("Waiting for mkdir() calls to finish");
        try {
            pool.awaitTermination(5, TimeUnit.SECONDS);
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            break;
        }
    }
    log.info("Updated {} entries for table {}", count, tableName);
    if (TableState.OFFLINE != tableState) {
        c.tableOperations().online(tableName, true);
        log.info("table {} back online", tableName);
    }
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) VolumeManager(org.apache.accumulo.server.fs.VolumeManager) Scanner(org.apache.accumulo.core.client.Scanner) MetadataTable(org.apache.accumulo.core.metadata.MetadataTable) Table(org.apache.accumulo.core.client.impl.Table) ClientOnRequiredTable(org.apache.accumulo.core.cli.ClientOnRequiredTable) IOException(java.io.IOException) VolumeChooserEnvironment(org.apache.accumulo.server.fs.VolumeChooserEnvironment) Value(org.apache.accumulo.core.data.Value) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Mutation(org.apache.accumulo.core.data.Mutation) SimpleThreadPool(org.apache.accumulo.core.util.SimpleThreadPool) Key(org.apache.accumulo.core.data.Key) TableState(org.apache.accumulo.core.master.state.tables.TableState)

Example 44 with Table

use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.

the class TableDiskUsage method printDiskUsage.

public static void printDiskUsage(Collection<String> tableNames, VolumeManager fs, Connector conn, Printer printer, boolean humanReadable) throws TableNotFoundException, IOException {
    HashSet<Table.ID> tableIds = new HashSet<>();
    // Get table IDs for all tables requested to be 'du'
    for (String tableName : tableNames) {
        Table.ID tableId = Tables.getTableId(conn.getInstance(), tableName);
        if (tableId == null)
            throw new TableNotFoundException(null, tableName, "Table " + tableName + " not found");
        tableIds.add(tableId);
    }
    Map<TreeSet<String>, Long> usage = getDiskUsage(tableIds, fs, conn);
    String valueFormat = humanReadable ? "%9s" : "%,24d";
    for (Entry<TreeSet<String>, Long> entry : usage.entrySet()) {
        Object value = humanReadable ? NumUtil.bigNumberForSize(entry.getValue()) : entry.getValue();
        printer.print(String.format(valueFormat + " %s", value, entry.getKey()));
    }
}
Also used : TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) MetadataTable(org.apache.accumulo.core.metadata.MetadataTable) Table(org.apache.accumulo.core.client.impl.Table) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet)

Example 45 with Table

use of org.apache.accumulo.core.client.impl.Table in project accumulo by apache.

the class ReplicationResource method getReplicationInformation.

/**
 * Generates the replication table as a JSON object
 *
 * @return Replication list
 */
@GET
public List<ReplicationInformation> getReplicationInformation() throws AccumuloException, AccumuloSecurityException {
    final Connector conn = Monitor.getContext().getConnector();
    final TableOperations tops = conn.tableOperations();
    final Map<String, String> properties = conn.instanceOperations().getSystemConfiguration();
    final Map<String, String> peers = new HashMap<>();
    final String definedPeersPrefix = Property.REPLICATION_PEERS.getKey();
    final ReplicaSystemFactory replicaSystemFactory = new ReplicaSystemFactory();
    // Get the defined peers and what ReplicaSystem impl they're using
    for (Entry<String, String> property : properties.entrySet()) {
        String key = property.getKey();
        // Filter out cruft that we don't want
        if (key.startsWith(definedPeersPrefix) && !key.startsWith(Property.REPLICATION_PEER_USER.getKey()) && !key.startsWith(Property.REPLICATION_PEER_PASSWORD.getKey())) {
            String peerName = property.getKey().substring(definedPeersPrefix.length());
            ReplicaSystem replica;
            try {
                replica = replicaSystemFactory.get(property.getValue());
            } catch (Exception e) {
                log.warn("Could not instantiate ReplicaSystem for {} with configuration {}", property.getKey(), property.getValue(), e);
                continue;
            }
            peers.put(peerName, replica.getClass().getName());
        }
    }
    final String targetPrefix = Property.TABLE_REPLICATION_TARGET.getKey();
    // The total set of configured targets
    Set<ReplicationTarget> allConfiguredTargets = new HashSet<>();
    // Number of files per target we have to replicate
    Map<ReplicationTarget, Long> targetCounts = new HashMap<>();
    Map<String, Table.ID> tableNameToId = Tables.getNameToIdMap(conn.getInstance());
    Map<Table.ID, String> tableIdToName = invert(tableNameToId);
    for (String table : tops.list()) {
        if (MetadataTable.NAME.equals(table) || RootTable.NAME.equals(table)) {
            continue;
        }
        Table.ID localId = tableNameToId.get(table);
        if (null == localId) {
            log.trace("Could not determine ID for {}", table);
            continue;
        }
        Iterable<Entry<String, String>> propertiesForTable;
        try {
            propertiesForTable = tops.getProperties(table);
        } catch (TableNotFoundException e) {
            log.warn("Could not fetch properties for {}", table, e);
            continue;
        }
        for (Entry<String, String> prop : propertiesForTable) {
            if (prop.getKey().startsWith(targetPrefix)) {
                String peerName = prop.getKey().substring(targetPrefix.length());
                String remoteIdentifier = prop.getValue();
                ReplicationTarget target = new ReplicationTarget(peerName, remoteIdentifier, localId);
                allConfiguredTargets.add(target);
            }
        }
    }
    // Read over the queued work
    BatchScanner bs;
    try {
        bs = conn.createBatchScanner(ReplicationTable.NAME, Authorizations.EMPTY, 4);
    } catch (TableOfflineException | TableNotFoundException e) {
        log.error("Could not read replication table", e);
        return Collections.emptyList();
    }
    bs.setRanges(Collections.singleton(new Range()));
    WorkSection.limit(bs);
    try {
        Text buffer = new Text();
        for (Entry<Key, Value> entry : bs) {
            Key k = entry.getKey();
            k.getColumnQualifier(buffer);
            ReplicationTarget target = ReplicationTarget.from(buffer);
            // TODO ACCUMULO-2835 once explicit lengths are tracked, we can give size-based estimates instead of just file-based
            Long count = targetCounts.get(target);
            if (null == count) {
                targetCounts.put(target, 1l);
            } else {
                targetCounts.put(target, count + 1);
            }
        }
    } finally {
        bs.close();
    }
    List<ReplicationInformation> replicationInformation = new ArrayList<>();
    for (ReplicationTarget configuredTarget : allConfiguredTargets) {
        String tableName = tableIdToName.get(configuredTarget.getSourceTableId());
        if (null == tableName) {
            log.trace("Could not determine table name from id {}", configuredTarget.getSourceTableId());
            continue;
        }
        String replicaSystemClass = peers.get(configuredTarget.getPeerName());
        if (null == replicaSystemClass) {
            log.trace("Could not determine configured ReplicaSystem for {}", configuredTarget.getPeerName());
            continue;
        }
        Long numFiles = targetCounts.get(configuredTarget);
        replicationInformation.add(new ReplicationInformation(tableName, configuredTarget.getPeerName(), configuredTarget.getRemoteIdentifier(), replicaSystemClass, (null == numFiles) ? 0 : numFiles));
    }
    return replicationInformation;
}
Also used : Connector(org.apache.accumulo.core.client.Connector) TableOfflineException(org.apache.accumulo.core.client.TableOfflineException) HashMap(java.util.HashMap) BatchScanner(org.apache.accumulo.core.client.BatchScanner) ArrayList(java.util.ArrayList) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) Entry(java.util.Map.Entry) TableOperations(org.apache.accumulo.core.client.admin.TableOperations) ReplicaSystem(org.apache.accumulo.server.replication.ReplicaSystem) ReplicaSystemFactory(org.apache.accumulo.server.replication.ReplicaSystemFactory) HashSet(java.util.HashSet) MetadataTable(org.apache.accumulo.core.metadata.MetadataTable) RootTable(org.apache.accumulo.core.metadata.RootTable) Table(org.apache.accumulo.core.client.impl.Table) ReplicationTable(org.apache.accumulo.core.replication.ReplicationTable) Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) TableOfflineException(org.apache.accumulo.core.client.TableOfflineException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) ReplicationTarget(org.apache.accumulo.core.replication.ReplicationTarget) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key) GET(javax.ws.rs.GET)

Aggregations

Table (org.apache.accumulo.core.client.impl.Table)55 MetadataTable (org.apache.accumulo.core.metadata.MetadataTable)34 Value (org.apache.accumulo.core.data.Value)27 Key (org.apache.accumulo.core.data.Key)25 Text (org.apache.hadoop.io.Text)25 Scanner (org.apache.accumulo.core.client.Scanner)21 KeyExtent (org.apache.accumulo.core.data.impl.KeyExtent)21 Test (org.junit.Test)21 Connector (org.apache.accumulo.core.client.Connector)19 Mutation (org.apache.accumulo.core.data.Mutation)18 ReplicationTable (org.apache.accumulo.core.replication.ReplicationTable)18 BatchWriter (org.apache.accumulo.core.client.BatchWriter)17 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)17 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)15 ArrayList (java.util.ArrayList)14 AccumuloException (org.apache.accumulo.core.client.AccumuloException)14 Path (org.apache.hadoop.fs.Path)14 HashSet (java.util.HashSet)11 BatchWriterConfig (org.apache.accumulo.core.client.BatchWriterConfig)11 HashMap (java.util.HashMap)9