use of org.apache.accumulo.core.clientImpl.OfflineScanner in project accumulo by apache.
the class AccumuloRecordReader method initialize.
/**
* Initialize a scanner over the given input split using this task attempt configuration.
*/
public void initialize(InputSplit inSplit, JobConf job) throws IOException {
baseSplit = (org.apache.accumulo.hadoopImpl.mapreduce.RangeInputSplit) inSplit;
log.debug("Initializing input split: " + baseSplit);
client = createClient(job, CLASS);
ClientContext context = (ClientContext) client;
Authorizations authorizations = InputConfigurator.getScanAuthorizations(CLASS, job);
String classLoaderContext = InputConfigurator.getClassLoaderContext(CLASS, job);
String table = baseSplit.getTableName();
// in case the table name changed, we can still use the previous name for terms of
// configuration, but the scanner will use the table id resolved at job setup time
InputTableConfig tableConfig = InputConfigurator.getInputTableConfig(CLASS, job, baseSplit.getTableName());
log.debug("Created client with user: " + context.whoami());
log.debug("Creating scanner for table: " + table);
log.debug("Authorizations are: " + authorizations);
if (baseSplit instanceof BatchInputSplit) {
BatchScanner scanner;
BatchInputSplit multiRangeSplit = (BatchInputSplit) baseSplit;
try {
// Note: BatchScanner will use at most one thread per tablet, currently BatchInputSplit
// will not span tablets
int scanThreads = 1;
scanner = context.createBatchScanner(baseSplit.getTableName(), authorizations, scanThreads);
setupIterators(job, scanner, baseSplit);
if (classLoaderContext != null) {
scanner.setClassLoaderContext(classLoaderContext);
}
} catch (TableNotFoundException e) {
throw new IOException(e);
}
scanner.setRanges(multiRangeSplit.getRanges());
scannerBase = scanner;
} else if (baseSplit instanceof RangeInputSplit) {
split = (RangeInputSplit) baseSplit;
Boolean isOffline = baseSplit.isOffline();
if (isOffline == null) {
isOffline = tableConfig.isOfflineScan();
}
Boolean isIsolated = baseSplit.isIsolatedScan();
if (isIsolated == null) {
isIsolated = tableConfig.shouldUseIsolatedScanners();
}
Boolean usesLocalIterators = baseSplit.usesLocalIterators();
if (usesLocalIterators == null) {
usesLocalIterators = tableConfig.shouldUseLocalIterators();
}
Scanner scanner;
try {
if (isOffline) {
scanner = new OfflineScanner(context, TableId.of(baseSplit.getTableId()), authorizations);
} else {
scanner = new ScannerImpl(context, TableId.of(baseSplit.getTableId()), authorizations);
}
if (isIsolated) {
log.info("Creating isolated scanner");
scanner = new IsolatedScanner(scanner);
}
if (usesLocalIterators) {
log.info("Using local iterators");
scanner = new ClientSideIteratorScanner(scanner);
}
setupIterators(job, scanner, baseSplit);
} catch (RuntimeException e) {
throw new IOException(e);
}
scanner.setRange(baseSplit.getRange());
scannerBase = scanner;
} else {
throw new IllegalArgumentException("Can not initialize from " + baseSplit.getClass());
}
Collection<IteratorSetting.Column> columns = baseSplit.getFetchedColumns();
if (columns == null) {
columns = tableConfig.getFetchedColumns();
}
// setup a scanner within the bounds of this split
for (Pair<Text, Text> c : columns) {
if (c.getSecond() != null) {
log.debug("Fetching column " + c.getFirst() + ":" + c.getSecond());
scannerBase.fetchColumn(c.getFirst(), c.getSecond());
} else {
log.debug("Fetching column family " + c.getFirst());
scannerBase.fetchColumnFamily(c.getFirst());
}
}
SamplerConfiguration samplerConfig = baseSplit.getSamplerConfiguration();
if (samplerConfig == null) {
samplerConfig = tableConfig.getSamplerConfiguration();
}
if (samplerConfig != null) {
scannerBase.setSamplerConfiguration(samplerConfig);
}
Map<String, String> executionHints = baseSplit.getExecutionHints();
if (executionHints == null || executionHints.isEmpty()) {
executionHints = tableConfig.getExecutionHints();
}
if (executionHints != null) {
scannerBase.setExecutionHints(executionHints);
}
scannerIterator = scannerBase.iterator();
numKeysRead = 0;
}
use of org.apache.accumulo.core.clientImpl.OfflineScanner in project accumulo by apache.
the class AccumuloRecordReader method initialize.
@Override
public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {
split = (RangeInputSplit) inSplit;
log.debug("Initializing input split: " + split);
Configuration conf = attempt.getConfiguration();
client = createClient(attempt, this.CLASS);
ClientContext context = (ClientContext) client;
Authorizations authorizations = InputConfigurator.getScanAuthorizations(CLASS, conf);
String classLoaderContext = InputConfigurator.getClassLoaderContext(CLASS, conf);
String table = split.getTableName();
// in case the table name changed, we can still use the previous name for terms of
// configuration,
// but the scanner will use the table id resolved at job setup time
InputTableConfig tableConfig = InputConfigurator.getInputTableConfig(CLASS, conf, split.getTableName());
log.debug("Creating client with user: " + client.whoami());
log.debug("Creating scanner for table: " + table);
log.debug("Authorizations are: " + authorizations);
if (split instanceof BatchInputSplit) {
BatchInputSplit batchSplit = (BatchInputSplit) split;
BatchScanner scanner;
try {
// Note: BatchScanner will use at most one thread per tablet, currently BatchInputSplit
// will not span tablets
int scanThreads = 1;
scanner = context.createBatchScanner(split.getTableName(), authorizations, scanThreads);
setupIterators(attempt, scanner, split);
if (classLoaderContext != null) {
scanner.setClassLoaderContext(classLoaderContext);
}
} catch (TableNotFoundException e) {
e.printStackTrace();
throw new IOException(e);
}
scanner.setRanges(batchSplit.getRanges());
scannerBase = scanner;
} else {
Scanner scanner;
Boolean isOffline = split.isOffline();
if (isOffline == null) {
isOffline = tableConfig.isOfflineScan();
}
Boolean isIsolated = split.isIsolatedScan();
if (isIsolated == null) {
isIsolated = tableConfig.shouldUseIsolatedScanners();
}
Boolean usesLocalIterators = split.usesLocalIterators();
if (usesLocalIterators == null) {
usesLocalIterators = tableConfig.shouldUseLocalIterators();
}
try {
if (isOffline) {
scanner = new OfflineScanner(context, TableId.of(split.getTableId()), authorizations);
} else {
// Not using public API to create scanner so that we can use table ID
// Table ID is used in case of renames during M/R job
scanner = new ScannerImpl(context, TableId.of(split.getTableId()), authorizations);
}
if (isIsolated) {
log.info("Creating isolated scanner");
scanner = new IsolatedScanner(scanner);
}
if (usesLocalIterators) {
log.info("Using local iterators");
scanner = new ClientSideIteratorScanner(scanner);
}
setupIterators(attempt, scanner, split);
} catch (RuntimeException e) {
throw new IOException(e);
}
scanner.setRange(split.getRange());
scannerBase = scanner;
}
Collection<IteratorSetting.Column> columns = split.getFetchedColumns();
if (columns == null) {
columns = tableConfig.getFetchedColumns();
}
// setup a scanner within the bounds of this split
for (Pair<Text, Text> c : columns) {
if (c.getSecond() != null) {
log.debug("Fetching column " + c.getFirst() + ":" + c.getSecond());
scannerBase.fetchColumn(c.getFirst(), c.getSecond());
} else {
log.debug("Fetching column family " + c.getFirst());
scannerBase.fetchColumnFamily(c.getFirst());
}
}
SamplerConfiguration samplerConfig = split.getSamplerConfiguration();
if (samplerConfig == null) {
samplerConfig = tableConfig.getSamplerConfiguration();
}
if (samplerConfig != null) {
scannerBase.setSamplerConfiguration(samplerConfig);
}
Map<String, String> executionHints = split.getExecutionHints();
if (executionHints == null || executionHints.isEmpty()) {
executionHints = tableConfig.getExecutionHints();
}
if (executionHints != null) {
scannerBase.setExecutionHints(executionHints);
}
scannerIterator = scannerBase.iterator();
numKeysRead = 0;
}
use of org.apache.accumulo.core.clientImpl.OfflineScanner in project accumulo by apache.
the class SampleIT method newOfflineScanner.
private Scanner newOfflineScanner(AccumuloClient client, String tableName, String clone, SamplerConfiguration sc) throws Exception {
if (client.tableOperations().exists(clone)) {
client.tableOperations().delete(clone);
}
Map<String, String> em = Collections.emptyMap();
Set<String> es = Collections.emptySet();
client.tableOperations().clone(tableName, clone, false, em, es);
client.tableOperations().offline(clone, true);
TableId cloneID = TableId.of(client.tableOperations().tableIdMap().get(clone));
OfflineScanner oScanner = new OfflineScanner((ClientContext) client, cloneID, Authorizations.EMPTY);
if (sc != null) {
oScanner.setSamplerConfiguration(sc);
}
return oScanner;
}
Aggregations