Search in sources :

Example 81 with TableId

use of org.apache.accumulo.core.data.TableId in project accumulo by apache.

the class IteratorEnvIT method testEnv.

/**
 * Test the environment methods return what is expected.
 */
private static void testEnv(IteratorScope scope, Map<String, String> opts, IteratorEnvironment env) {
    TableId expectedTableId = TableId.of(opts.get("expected.table.id"));
    // verify getServiceEnv() and getPluginEnv() are the same objects,
    // so further checks only need to use getPluginEnv()
    @SuppressWarnings("deprecation") ServiceEnvironment serviceEnv = env.getServiceEnv();
    PluginEnvironment pluginEnv = env.getPluginEnv();
    if (serviceEnv != pluginEnv)
        throw new RuntimeException("Test failed - assertSame(getServiceEnv(),getPluginEnv())");
    // verify property exists on the table config (deprecated and new),
    // with and without custom prefix, but not in the system config
    @SuppressWarnings("deprecation") String accTableConf = env.getConfig().get("table.custom.iterator.env.test");
    if (!"value1".equals(accTableConf))
        throw new RuntimeException("Test failed - Expected table property not found in getConfig().");
    var tableConf = pluginEnv.getConfiguration(env.getTableId());
    if (!"value1".equals(tableConf.get("table.custom.iterator.env.test")))
        throw new RuntimeException("Test failed - Expected table property not found in table conf.");
    if (!"value1".equals(tableConf.getTableCustom("iterator.env.test")))
        throw new RuntimeException("Test failed - Expected table property not found in table conf.");
    var systemConf = pluginEnv.getConfiguration();
    if (systemConf.get("table.custom.iterator.env.test") != null)
        throw new RuntimeException("Test failed - Unexpected table property found in system conf.");
    // check other environment settings
    if (!scope.equals(env.getIteratorScope()))
        throw new RuntimeException("Test failed - Error getting iterator scope");
    if (env.isSamplingEnabled())
        throw new RuntimeException("Test failed - isSamplingEnabled returned true, expected false");
    if (!expectedTableId.equals(env.getTableId()))
        throw new RuntimeException("Test failed - Error getting Table ID");
}
Also used : TableId(org.apache.accumulo.core.data.TableId) PluginEnvironment(org.apache.accumulo.core.client.PluginEnvironment) ServiceEnvironment(org.apache.accumulo.core.spi.common.ServiceEnvironment)

Example 82 with TableId

use of org.apache.accumulo.core.data.TableId in project accumulo by apache.

the class AbstractInputFormat method getSplits.

/**
 * Gets the splits of the tables that have been set on the job by reading the metadata table for
 * the specified ranges.
 *
 * @return the splits from the tables based on the ranges.
 * @throws java.io.IOException
 *           if a table set on the job doesn't exist or an error occurs initializing the tablet
 *           locator
 */
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    Level logLevel = getLogLevel(job);
    log.setLevel(logLevel);
    validateOptions(job);
    LinkedList<InputSplit> splits = new LinkedList<>();
    Map<String, org.apache.accumulo.core.client.mapreduce.InputTableConfig> tableConfigs = getInputTableConfigs(job);
    for (Map.Entry<String, org.apache.accumulo.core.client.mapreduce.InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
        String tableName = tableConfigEntry.getKey();
        org.apache.accumulo.core.client.mapreduce.InputTableConfig tableConfig = tableConfigEntry.getValue();
        ClientContext client;
        try {
            client = InputConfigurator.client(CLASS, job);
        } catch (AccumuloException | AccumuloSecurityException e) {
            throw new IOException(e);
        }
        TableId tableId;
        // resolve table name to id once, and use id from this point forward
        try {
            tableId = client.getTableId(tableName);
        } catch (TableNotFoundException e) {
            throw new IOException(e);
        }
        boolean batchScan = InputConfigurator.isBatchScan(CLASS, job);
        boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners() || tableConfig.shouldUseLocalIterators());
        if (batchScan && !supportBatchScan)
            throw new IllegalArgumentException("BatchScanner optimization not available for offline" + " scan, isolated, or local iterators");
        boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
        if (batchScan && !autoAdjust)
            throw new IllegalArgumentException("AutoAdjustRanges must be enabled when using BatchScanner optimization");
        List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
        if (ranges.isEmpty()) {
            ranges = new ArrayList<>(1);
            ranges.add(new Range());
        }
        // get the metadata information for these ranges
        Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
        TabletLocator tl;
        try {
            if (tableConfig.isOfflineScan()) {
                binnedRanges = binOfflineTable(job, tableId, ranges);
                while (binnedRanges == null) {
                    // Some tablets were still online, try again
                    // sleep randomly between 100 and 200 ms
                    sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
                    binnedRanges = binOfflineTable(job, tableId, ranges);
                }
            } else {
                tl = TabletLocator.getLocator(client, tableId);
                // its possible that the cache could contain complete, but old information about a
                // tables tablets... so clear it
                tl.invalidateCache();
                while (!tl.binRanges(client, ranges, binnedRanges).isEmpty()) {
                    client.requireNotDeleted(tableId);
                    client.requireNotOffline(tableId, tableName);
                    binnedRanges.clear();
                    log.warn("Unable to locate bins for specified ranges. Retrying.");
                    // sleep randomly between 100 and 200 ms
                    sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
                    tl.invalidateCache();
                }
            }
        } catch (Exception e) {
            throw new IOException(e);
        }
        // all of this code will add either range per each locations or split ranges and add
        // range-location split
        // Map from Range to Array of Locations, we only use this if we're don't split
        HashMap<Range, ArrayList<String>> splitsToAdd = null;
        if (!autoAdjust)
            splitsToAdd = new HashMap<>();
        HashMap<String, String> hostNameCache = new HashMap<>();
        for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
            String ip = tserverBin.getKey().split(":", 2)[0];
            String location = hostNameCache.get(ip);
            if (location == null) {
                InetAddress inetAddress = InetAddress.getByName(ip);
                location = inetAddress.getCanonicalHostName();
                hostNameCache.put(ip, location);
            }
            for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
                Range ke = extentRanges.getKey().toDataRange();
                if (batchScan) {
                    // group ranges by tablet to be read by a BatchScanner
                    ArrayList<Range> clippedRanges = new ArrayList<>();
                    for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r));
                    org.apache.accumulo.core.clientImpl.mapred.BatchInputSplit split = new org.apache.accumulo.core.clientImpl.mapred.BatchInputSplit(tableName, tableId, clippedRanges, new String[] { location });
                    org.apache.accumulo.core.clientImpl.mapreduce.SplitUtils.updateSplit(split, tableConfig, logLevel);
                    splits.add(split);
                } else {
                    // not grouping by tablet
                    for (Range r : extentRanges.getValue()) {
                        if (autoAdjust) {
                            // divide ranges into smaller ranges, based on the tablets
                            RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonical(), ke.clip(r), new String[] { location });
                            org.apache.accumulo.core.clientImpl.mapreduce.SplitUtils.updateSplit(split, tableConfig, logLevel);
                            split.setOffline(tableConfig.isOfflineScan());
                            split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                            split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
                            splits.add(split);
                        } else {
                            // don't divide ranges
                            ArrayList<String> locations = splitsToAdd.get(r);
                            if (locations == null)
                                locations = new ArrayList<>(1);
                            locations.add(location);
                            splitsToAdd.put(r, locations);
                        }
                    }
                }
            }
        }
        if (!autoAdjust)
            for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
                RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonical(), entry.getKey(), entry.getValue().toArray(new String[0]));
                org.apache.accumulo.core.clientImpl.mapreduce.SplitUtils.updateSplit(split, tableConfig, logLevel);
                split.setOffline(tableConfig.isOfflineScan());
                split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
                splits.add(split);
            }
    }
    return splits.toArray(new InputSplit[splits.size()]);
}
Also used : TableId(org.apache.accumulo.core.data.TableId) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) InputSplit(org.apache.hadoop.mapred.InputSplit) AccumuloException(org.apache.accumulo.core.client.AccumuloException) ClientContext(org.apache.accumulo.core.clientImpl.ClientContext) IOException(java.io.IOException) Range(org.apache.accumulo.core.data.Range) LinkedList(java.util.LinkedList) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) TabletLocator(org.apache.accumulo.core.clientImpl.TabletLocator) Level(org.apache.log4j.Level) Map(java.util.Map) HashMap(java.util.HashMap) InetAddress(java.net.InetAddress)

Example 83 with TableId

use of org.apache.accumulo.core.data.TableId in project accumulo by apache.

the class InputConfigurator method binOffline.

public static Map<String, Map<KeyExtent, List<Range>>> binOffline(TableId tableId, List<Range> ranges, ClientContext context) throws AccumuloException, TableNotFoundException {
    Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
    if (context.getTableState(tableId) != TableState.OFFLINE) {
        context.clearTableListCache();
        if (context.getTableState(tableId) != TableState.OFFLINE) {
            throw new AccumuloException("Table is online tableId:" + tableId + " cannot scan table in offline mode ");
        }
    }
    for (Range range : ranges) {
        Text startRow;
        if (range.getStartKey() != null)
            startRow = range.getStartKey().getRow();
        else
            startRow = new Text();
        Range metadataRange = new Range(new KeyExtent(tableId, startRow, null).toMetaRow(), true, null, false);
        Scanner scanner = context.createScanner(MetadataTable.NAME, Authorizations.EMPTY);
        TabletColumnFamily.PREV_ROW_COLUMN.fetch(scanner);
        scanner.fetchColumnFamily(LastLocationColumnFamily.NAME);
        scanner.fetchColumnFamily(CurrentLocationColumnFamily.NAME);
        scanner.fetchColumnFamily(FutureLocationColumnFamily.NAME);
        scanner.setRange(metadataRange);
        RowIterator rowIter = new RowIterator(scanner);
        KeyExtent lastExtent = null;
        while (rowIter.hasNext()) {
            Iterator<Map.Entry<Key, Value>> row = rowIter.next();
            String last = "";
            KeyExtent extent = null;
            String location = null;
            while (row.hasNext()) {
                Map.Entry<Key, Value> entry = row.next();
                Key key = entry.getKey();
                if (key.getColumnFamily().equals(LastLocationColumnFamily.NAME)) {
                    last = entry.getValue().toString();
                }
                if (key.getColumnFamily().equals(CurrentLocationColumnFamily.NAME) || key.getColumnFamily().equals(FutureLocationColumnFamily.NAME)) {
                    location = entry.getValue().toString();
                }
                if (TabletColumnFamily.PREV_ROW_COLUMN.hasColumns(key)) {
                    extent = KeyExtent.fromMetaPrevRow(entry);
                }
            }
            if (location != null)
                return null;
            if (!extent.tableId().equals(tableId)) {
                throw new AccumuloException("Saw unexpected table Id " + tableId + " " + extent);
            }
            if (lastExtent != null && !extent.isPreviousExtent(lastExtent)) {
                throw new AccumuloException(" " + lastExtent + " is not previous extent " + extent);
            }
            binnedRanges.computeIfAbsent(last, k -> new HashMap<>()).computeIfAbsent(extent, k -> new ArrayList<>()).add(range);
            if (extent.endRow() == null || range.afterEndKey(new Key(extent.endRow()).followingKey(PartialKey.ROW))) {
                break;
            }
            lastExtent = extent;
        }
    }
    return binnedRanges;
}
Also used : TableId(org.apache.accumulo.core.data.TableId) Text(org.apache.hadoop.io.Text) MetadataTable(org.apache.accumulo.core.metadata.MetadataTable) Writable(org.apache.hadoop.io.Writable) TextUtil(org.apache.accumulo.core.util.TextUtil) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) BatchScanner(org.apache.accumulo.core.client.BatchScanner) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) NamespacePermission(org.apache.accumulo.core.security.NamespacePermission) ByteArrayInputStream(java.io.ByteArrayInputStream) DataOutputStream(java.io.DataOutputStream) CurrentLocationColumnFamily(org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.CurrentLocationColumnFamily) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) Value(org.apache.accumulo.core.data.Value) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) TableState(org.apache.accumulo.core.manager.state.tables.TableState) ClientContext(org.apache.accumulo.core.clientImpl.ClientContext) Collection(java.util.Collection) Set(java.util.Set) AccumuloClient(org.apache.accumulo.core.client.AccumuloClient) Base64(java.util.Base64) List(java.util.List) Pair(org.apache.accumulo.core.util.Pair) ClientSideIteratorScanner(org.apache.accumulo.core.client.ClientSideIteratorScanner) IsolatedScanner(org.apache.accumulo.core.client.IsolatedScanner) Scanner(org.apache.accumulo.core.client.Scanner) DataInputStream(java.io.DataInputStream) SamplerConfigurationImpl(org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl) ByteArrayOutputStream(java.io.ByteArrayOutputStream) HashMap(java.util.HashMap) MapWritable(org.apache.hadoop.io.MapWritable) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) FutureLocationColumnFamily(org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.FutureLocationColumnFamily) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) LastLocationColumnFamily(org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.LastLocationColumnFamily) TablePermission(org.apache.accumulo.core.security.TablePermission) StringUtils(org.apache.hadoop.util.StringUtils) StringTokenizer(java.util.StringTokenizer) Objects.requireNonNull(java.util.Objects.requireNonNull) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) Key(org.apache.accumulo.core.data.Key) Iterator(java.util.Iterator) UTF_8(java.nio.charset.StandardCharsets.UTF_8) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) IOException(java.io.IOException) Authorizations(org.apache.accumulo.core.security.Authorizations) Maps(com.google.common.collect.Maps) AccumuloException(org.apache.accumulo.core.client.AccumuloException) Range(org.apache.accumulo.core.data.Range) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) TabletColumnFamily(org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily) PartialKey(org.apache.accumulo.core.data.PartialKey) Collections(java.util.Collections) RowIterator(org.apache.accumulo.core.client.RowIterator) AccumuloException(org.apache.accumulo.core.client.AccumuloException) BatchScanner(org.apache.accumulo.core.client.BatchScanner) ClientSideIteratorScanner(org.apache.accumulo.core.client.ClientSideIteratorScanner) IsolatedScanner(org.apache.accumulo.core.client.IsolatedScanner) Scanner(org.apache.accumulo.core.client.Scanner) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) RowIterator(org.apache.accumulo.core.client.RowIterator) Value(org.apache.accumulo.core.data.Value) Map(java.util.Map) HashMap(java.util.HashMap) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 84 with TableId

use of org.apache.accumulo.core.data.TableId in project accumulo by apache.

the class KeyExtent method readFrom.

/**
 * Create a KeyExtent from a serialized form.
 *
 * @see #writeTo(DataOutput)
 */
public static KeyExtent readFrom(DataInput in) throws IOException {
    Text tid = new Text();
    tid.readFields(in);
    TableId tableId = TableId.of(tid.toString());
    Text endRow = null;
    Text prevEndRow = null;
    boolean hasRow = in.readBoolean();
    if (hasRow) {
        endRow = new Text();
        endRow.readFields(in);
    }
    boolean hasPrevRow = in.readBoolean();
    if (hasPrevRow) {
        prevEndRow = new Text();
        prevEndRow.readFields(in);
    }
    return new KeyExtent(tableId, endRow, prevEndRow);
}
Also used : TableId(org.apache.accumulo.core.data.TableId) Text(org.apache.hadoop.io.Text) TKeyExtent(org.apache.accumulo.core.dataImpl.thrift.TKeyExtent)

Example 85 with TableId

use of org.apache.accumulo.core.data.TableId in project accumulo by apache.

the class KeyExtent method fromMetaRow.

/**
 * Create a KeyExtent from the table ID and the end row encoded in the row field of a tablet's
 * metadata entry, along with a previous end row.
 *
 * @param encodedMetadataRow
 *          the encoded <code>tableId</code> and <code>endRow</code> from a metadata entry, as in
 *          <code>entry.getKey().getRow()</code> or from
 *          {@link TabletsSection#encodeRow(TableId, Text)}
 * @param prevEndRow
 *          the unencoded previous end row (a copy will be made)
 */
public static KeyExtent fromMetaRow(Text encodedMetadataRow, Text prevEndRow) {
    Pair<TableId, Text> tableIdAndEndRow = TabletsSection.decodeRow(encodedMetadataRow);
    TableId tableId = tableIdAndEndRow.getFirst();
    Text endRow = tableIdAndEndRow.getSecond();
    return new KeyExtent(tableId, endRow, prevEndRow);
}
Also used : TableId(org.apache.accumulo.core.data.TableId) Text(org.apache.hadoop.io.Text) TKeyExtent(org.apache.accumulo.core.dataImpl.thrift.TKeyExtent)

Aggregations

TableId (org.apache.accumulo.core.data.TableId)169 Text (org.apache.hadoop.io.Text)64 HashMap (java.util.HashMap)55 KeyExtent (org.apache.accumulo.core.dataImpl.KeyExtent)55 ArrayList (java.util.ArrayList)45 Test (org.junit.Test)43 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)41 Map (java.util.Map)37 Key (org.apache.accumulo.core.data.Key)36 AccumuloClient (org.apache.accumulo.core.client.AccumuloClient)34 HashSet (java.util.HashSet)31 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)31 Value (org.apache.accumulo.core.data.Value)31 IOException (java.io.IOException)28 Scanner (org.apache.accumulo.core.client.Scanner)28 AccumuloException (org.apache.accumulo.core.client.AccumuloException)27 Mutation (org.apache.accumulo.core.data.Mutation)27 List (java.util.List)26 Range (org.apache.accumulo.core.data.Range)24 BatchWriter (org.apache.accumulo.core.client.BatchWriter)23