Search in sources :

Example 1 with OfflineScanner

use of org.apache.accumulo.core.clientImpl.OfflineScanner in project accumulo by apache.

the class AccumuloRecordReader method initialize.

/**
 * Initialize a scanner over the given input split using this task attempt configuration.
 */
public void initialize(InputSplit inSplit, JobConf job) throws IOException {
    baseSplit = (org.apache.accumulo.hadoopImpl.mapreduce.RangeInputSplit) inSplit;
    log.debug("Initializing input split: " + baseSplit);
    client = createClient(job, CLASS);
    ClientContext context = (ClientContext) client;
    Authorizations authorizations = InputConfigurator.getScanAuthorizations(CLASS, job);
    String classLoaderContext = InputConfigurator.getClassLoaderContext(CLASS, job);
    String table = baseSplit.getTableName();
    // in case the table name changed, we can still use the previous name for terms of
    // configuration, but the scanner will use the table id resolved at job setup time
    InputTableConfig tableConfig = InputConfigurator.getInputTableConfig(CLASS, job, baseSplit.getTableName());
    log.debug("Created client with user: " + context.whoami());
    log.debug("Creating scanner for table: " + table);
    log.debug("Authorizations are: " + authorizations);
    if (baseSplit instanceof BatchInputSplit) {
        BatchScanner scanner;
        BatchInputSplit multiRangeSplit = (BatchInputSplit) baseSplit;
        try {
            // Note: BatchScanner will use at most one thread per tablet, currently BatchInputSplit
            // will not span tablets
            int scanThreads = 1;
            scanner = context.createBatchScanner(baseSplit.getTableName(), authorizations, scanThreads);
            setupIterators(job, scanner, baseSplit);
            if (classLoaderContext != null) {
                scanner.setClassLoaderContext(classLoaderContext);
            }
        } catch (TableNotFoundException e) {
            throw new IOException(e);
        }
        scanner.setRanges(multiRangeSplit.getRanges());
        scannerBase = scanner;
    } else if (baseSplit instanceof RangeInputSplit) {
        split = (RangeInputSplit) baseSplit;
        Boolean isOffline = baseSplit.isOffline();
        if (isOffline == null) {
            isOffline = tableConfig.isOfflineScan();
        }
        Boolean isIsolated = baseSplit.isIsolatedScan();
        if (isIsolated == null) {
            isIsolated = tableConfig.shouldUseIsolatedScanners();
        }
        Boolean usesLocalIterators = baseSplit.usesLocalIterators();
        if (usesLocalIterators == null) {
            usesLocalIterators = tableConfig.shouldUseLocalIterators();
        }
        Scanner scanner;
        try {
            if (isOffline) {
                scanner = new OfflineScanner(context, TableId.of(baseSplit.getTableId()), authorizations);
            } else {
                scanner = new ScannerImpl(context, TableId.of(baseSplit.getTableId()), authorizations);
            }
            if (isIsolated) {
                log.info("Creating isolated scanner");
                scanner = new IsolatedScanner(scanner);
            }
            if (usesLocalIterators) {
                log.info("Using local iterators");
                scanner = new ClientSideIteratorScanner(scanner);
            }
            setupIterators(job, scanner, baseSplit);
        } catch (RuntimeException e) {
            throw new IOException(e);
        }
        scanner.setRange(baseSplit.getRange());
        scannerBase = scanner;
    } else {
        throw new IllegalArgumentException("Can not initialize from " + baseSplit.getClass());
    }
    Collection<IteratorSetting.Column> columns = baseSplit.getFetchedColumns();
    if (columns == null) {
        columns = tableConfig.getFetchedColumns();
    }
    // setup a scanner within the bounds of this split
    for (Pair<Text, Text> c : columns) {
        if (c.getSecond() != null) {
            log.debug("Fetching column " + c.getFirst() + ":" + c.getSecond());
            scannerBase.fetchColumn(c.getFirst(), c.getSecond());
        } else {
            log.debug("Fetching column family " + c.getFirst());
            scannerBase.fetchColumnFamily(c.getFirst());
        }
    }
    SamplerConfiguration samplerConfig = baseSplit.getSamplerConfiguration();
    if (samplerConfig == null) {
        samplerConfig = tableConfig.getSamplerConfiguration();
    }
    if (samplerConfig != null) {
        scannerBase.setSamplerConfiguration(samplerConfig);
    }
    Map<String, String> executionHints = baseSplit.getExecutionHints();
    if (executionHints == null || executionHints.isEmpty()) {
        executionHints = tableConfig.getExecutionHints();
    }
    if (executionHints != null) {
        scannerBase.setExecutionHints(executionHints);
    }
    scannerIterator = scannerBase.iterator();
    numKeysRead = 0;
}
Also used : BatchScanner(org.apache.accumulo.core.client.BatchScanner) OfflineScanner(org.apache.accumulo.core.clientImpl.OfflineScanner) ClientSideIteratorScanner(org.apache.accumulo.core.client.ClientSideIteratorScanner) IsolatedScanner(org.apache.accumulo.core.client.IsolatedScanner) Scanner(org.apache.accumulo.core.client.Scanner) BatchScanner(org.apache.accumulo.core.client.BatchScanner) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) OfflineScanner(org.apache.accumulo.core.clientImpl.OfflineScanner) ClientSideIteratorScanner(org.apache.accumulo.core.client.ClientSideIteratorScanner) Authorizations(org.apache.accumulo.core.security.Authorizations) ClientContext(org.apache.accumulo.core.clientImpl.ClientContext) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) ScannerImpl(org.apache.accumulo.core.clientImpl.ScannerImpl) InputTableConfig(org.apache.accumulo.hadoopImpl.mapreduce.InputTableConfig) IsolatedScanner(org.apache.accumulo.core.client.IsolatedScanner)

Example 2 with OfflineScanner

use of org.apache.accumulo.core.clientImpl.OfflineScanner in project accumulo by apache.

the class AccumuloRecordReader method initialize.

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {
    split = (RangeInputSplit) inSplit;
    log.debug("Initializing input split: " + split);
    Configuration conf = attempt.getConfiguration();
    client = createClient(attempt, this.CLASS);
    ClientContext context = (ClientContext) client;
    Authorizations authorizations = InputConfigurator.getScanAuthorizations(CLASS, conf);
    String classLoaderContext = InputConfigurator.getClassLoaderContext(CLASS, conf);
    String table = split.getTableName();
    // in case the table name changed, we can still use the previous name for terms of
    // configuration,
    // but the scanner will use the table id resolved at job setup time
    InputTableConfig tableConfig = InputConfigurator.getInputTableConfig(CLASS, conf, split.getTableName());
    log.debug("Creating client with user: " + client.whoami());
    log.debug("Creating scanner for table: " + table);
    log.debug("Authorizations are: " + authorizations);
    if (split instanceof BatchInputSplit) {
        BatchInputSplit batchSplit = (BatchInputSplit) split;
        BatchScanner scanner;
        try {
            // Note: BatchScanner will use at most one thread per tablet, currently BatchInputSplit
            // will not span tablets
            int scanThreads = 1;
            scanner = context.createBatchScanner(split.getTableName(), authorizations, scanThreads);
            setupIterators(attempt, scanner, split);
            if (classLoaderContext != null) {
                scanner.setClassLoaderContext(classLoaderContext);
            }
        } catch (TableNotFoundException e) {
            e.printStackTrace();
            throw new IOException(e);
        }
        scanner.setRanges(batchSplit.getRanges());
        scannerBase = scanner;
    } else {
        Scanner scanner;
        Boolean isOffline = split.isOffline();
        if (isOffline == null) {
            isOffline = tableConfig.isOfflineScan();
        }
        Boolean isIsolated = split.isIsolatedScan();
        if (isIsolated == null) {
            isIsolated = tableConfig.shouldUseIsolatedScanners();
        }
        Boolean usesLocalIterators = split.usesLocalIterators();
        if (usesLocalIterators == null) {
            usesLocalIterators = tableConfig.shouldUseLocalIterators();
        }
        try {
            if (isOffline) {
                scanner = new OfflineScanner(context, TableId.of(split.getTableId()), authorizations);
            } else {
                // Not using public API to create scanner so that we can use table ID
                // Table ID is used in case of renames during M/R job
                scanner = new ScannerImpl(context, TableId.of(split.getTableId()), authorizations);
            }
            if (isIsolated) {
                log.info("Creating isolated scanner");
                scanner = new IsolatedScanner(scanner);
            }
            if (usesLocalIterators) {
                log.info("Using local iterators");
                scanner = new ClientSideIteratorScanner(scanner);
            }
            setupIterators(attempt, scanner, split);
        } catch (RuntimeException e) {
            throw new IOException(e);
        }
        scanner.setRange(split.getRange());
        scannerBase = scanner;
    }
    Collection<IteratorSetting.Column> columns = split.getFetchedColumns();
    if (columns == null) {
        columns = tableConfig.getFetchedColumns();
    }
    // setup a scanner within the bounds of this split
    for (Pair<Text, Text> c : columns) {
        if (c.getSecond() != null) {
            log.debug("Fetching column " + c.getFirst() + ":" + c.getSecond());
            scannerBase.fetchColumn(c.getFirst(), c.getSecond());
        } else {
            log.debug("Fetching column family " + c.getFirst());
            scannerBase.fetchColumnFamily(c.getFirst());
        }
    }
    SamplerConfiguration samplerConfig = split.getSamplerConfiguration();
    if (samplerConfig == null) {
        samplerConfig = tableConfig.getSamplerConfiguration();
    }
    if (samplerConfig != null) {
        scannerBase.setSamplerConfiguration(samplerConfig);
    }
    Map<String, String> executionHints = split.getExecutionHints();
    if (executionHints == null || executionHints.isEmpty()) {
        executionHints = tableConfig.getExecutionHints();
    }
    if (executionHints != null) {
        scannerBase.setExecutionHints(executionHints);
    }
    scannerIterator = scannerBase.iterator();
    numKeysRead = 0;
}
Also used : ClientSideIteratorScanner(org.apache.accumulo.core.client.ClientSideIteratorScanner) BatchScanner(org.apache.accumulo.core.client.BatchScanner) OfflineScanner(org.apache.accumulo.core.clientImpl.OfflineScanner) ClientSideIteratorScanner(org.apache.accumulo.core.client.ClientSideIteratorScanner) IsolatedScanner(org.apache.accumulo.core.client.IsolatedScanner) Scanner(org.apache.accumulo.core.client.Scanner) Authorizations(org.apache.accumulo.core.security.Authorizations) Configuration(org.apache.hadoop.conf.Configuration) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) ClientContext(org.apache.accumulo.core.clientImpl.ClientContext) BatchScanner(org.apache.accumulo.core.client.BatchScanner) SamplerConfiguration(org.apache.accumulo.core.client.sample.SamplerConfiguration) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) ScannerImpl(org.apache.accumulo.core.clientImpl.ScannerImpl) OfflineScanner(org.apache.accumulo.core.clientImpl.OfflineScanner) IsolatedScanner(org.apache.accumulo.core.client.IsolatedScanner)

Example 3 with OfflineScanner

use of org.apache.accumulo.core.clientImpl.OfflineScanner in project accumulo by apache.

the class SampleIT method newOfflineScanner.

private Scanner newOfflineScanner(AccumuloClient client, String tableName, String clone, SamplerConfiguration sc) throws Exception {
    if (client.tableOperations().exists(clone)) {
        client.tableOperations().delete(clone);
    }
    Map<String, String> em = Collections.emptyMap();
    Set<String> es = Collections.emptySet();
    client.tableOperations().clone(tableName, clone, false, em, es);
    client.tableOperations().offline(clone, true);
    TableId cloneID = TableId.of(client.tableOperations().tableIdMap().get(clone));
    OfflineScanner oScanner = new OfflineScanner((ClientContext) client, cloneID, Authorizations.EMPTY);
    if (sc != null) {
        oScanner.setSamplerConfiguration(sc);
    }
    return oScanner;
}
Also used : TableId(org.apache.accumulo.core.data.TableId) OfflineScanner(org.apache.accumulo.core.clientImpl.OfflineScanner)

Aggregations

OfflineScanner (org.apache.accumulo.core.clientImpl.OfflineScanner)3 IOException (java.io.IOException)2 BatchScanner (org.apache.accumulo.core.client.BatchScanner)2 ClientSideIteratorScanner (org.apache.accumulo.core.client.ClientSideIteratorScanner)2 IsolatedScanner (org.apache.accumulo.core.client.IsolatedScanner)2 Scanner (org.apache.accumulo.core.client.Scanner)2 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)2 SamplerConfiguration (org.apache.accumulo.core.client.sample.SamplerConfiguration)2 ClientContext (org.apache.accumulo.core.clientImpl.ClientContext)2 ScannerImpl (org.apache.accumulo.core.clientImpl.ScannerImpl)2 Authorizations (org.apache.accumulo.core.security.Authorizations)2 Text (org.apache.hadoop.io.Text)2 TableId (org.apache.accumulo.core.data.TableId)1 InputTableConfig (org.apache.accumulo.hadoopImpl.mapreduce.InputTableConfig)1 Configuration (org.apache.hadoop.conf.Configuration)1