use of org.apache.accumulo.core.client.mapreduce.InputTableConfig in project accumulo by apache.
the class InputConfigurator method getDefaultInputTableConfig.
/**
* Returns the {@link org.apache.accumulo.core.client.mapreduce.InputTableConfig} for the configuration based on the properties set using the single-table
* input methods.
*
* @param implementingClass
* the class whose name will be used as a prefix for the property configuration key
* @param conf
* the Hadoop instance for which to retrieve the configuration
* @return the config object built from the single input table properties set on the job
* @since 1.6.0
*/
protected static Map.Entry<String, InputTableConfig> getDefaultInputTableConfig(Class<?> implementingClass, Configuration conf) {
String tableName = getInputTableName(implementingClass, conf);
if (tableName != null) {
InputTableConfig queryConfig = new InputTableConfig();
List<IteratorSetting> itrs = getIterators(implementingClass, conf);
if (itrs != null)
queryConfig.setIterators(itrs);
Set<Pair<Text, Text>> columns = getFetchedColumns(implementingClass, conf);
if (columns != null)
queryConfig.fetchColumns(columns);
List<Range> ranges = null;
try {
ranges = getRanges(implementingClass, conf);
} catch (IOException e) {
throw new RuntimeException(e);
}
if (ranges != null)
queryConfig.setRanges(ranges);
SamplerConfiguration samplerConfig = getSamplerConfiguration(implementingClass, conf);
if (samplerConfig != null) {
queryConfig.setSamplerConfiguration(samplerConfig);
}
queryConfig.setAutoAdjustRanges(getAutoAdjustRanges(implementingClass, conf)).setUseIsolatedScanners(isIsolated(implementingClass, conf)).setUseLocalIterators(usesLocalIterators(implementingClass, conf)).setOfflineScan(isOfflineScan(implementingClass, conf));
return Maps.immutableEntry(tableName, queryConfig);
}
return null;
}
use of org.apache.accumulo.core.client.mapreduce.InputTableConfig in project accumulo by apache.
the class AbstractInputFormat method getSplits.
/**
* Gets the splits of the tables that have been set on the job by reading the metadata table for the specified ranges.
*
* @return the splits from the tables based on the ranges.
* @throws java.io.IOException
* if a table set on the job doesn't exist or an error occurs initializing the tablet locator
*/
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
Level logLevel = getLogLevel(job);
log.setLevel(logLevel);
validateOptions(job);
Random random = new Random();
LinkedList<InputSplit> splits = new LinkedList<>();
Map<String, InputTableConfig> tableConfigs = getInputTableConfigs(job);
for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
String tableName = tableConfigEntry.getKey();
InputTableConfig tableConfig = tableConfigEntry.getValue();
Instance instance = getInstance(job);
Table.ID tableId;
// resolve table name to id once, and use id from this point forward
if (DeprecationUtil.isMockInstance(instance)) {
tableId = Table.ID.of("");
} else {
try {
tableId = Tables.getTableId(instance, tableName);
} catch (TableNotFoundException e) {
throw new IOException(e);
}
}
Authorizations auths = getScanAuthorizations(job);
String principal = getPrincipal(job);
AuthenticationToken token = getAuthenticationToken(job);
boolean batchScan = InputConfigurator.isBatchScan(CLASS, job);
boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners() || tableConfig.shouldUseLocalIterators());
if (batchScan && !supportBatchScan)
throw new IllegalArgumentException("BatchScanner optimization not available for offline scan, isolated, or local iterators");
boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
if (batchScan && !autoAdjust)
throw new IllegalArgumentException("AutoAdjustRanges must be enabled when using BatchScanner optimization");
List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
if (ranges.isEmpty()) {
ranges = new ArrayList<>(1);
ranges.add(new Range());
}
// get the metadata information for these ranges
Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
TabletLocator tl;
try {
if (tableConfig.isOfflineScan()) {
binnedRanges = binOfflineTable(job, tableId, ranges);
while (binnedRanges == null) {
// Some tablets were still online, try again
// sleep randomly between 100 and 200 ms
sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
binnedRanges = binOfflineTable(job, tableId, ranges);
}
} else {
tl = InputConfigurator.getTabletLocator(CLASS, job, tableId);
// its possible that the cache could contain complete, but old information about a tables tablets... so clear it
tl.invalidateCache();
ClientContext context = new ClientContext(getInstance(job), new Credentials(getPrincipal(job), getAuthenticationToken(job)), getClientConfiguration(job));
while (!tl.binRanges(context, ranges, binnedRanges).isEmpty()) {
if (!DeprecationUtil.isMockInstance(instance)) {
String tableIdStr = tableId.canonicalID();
if (!Tables.exists(instance, tableId))
throw new TableDeletedException(tableIdStr);
if (Tables.getTableState(instance, tableId) == TableState.OFFLINE)
throw new TableOfflineException(instance, tableIdStr);
}
binnedRanges.clear();
log.warn("Unable to locate bins for specified ranges. Retrying.");
// sleep randomly between 100 and 200 ms
sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
tl.invalidateCache();
}
}
} catch (Exception e) {
throw new IOException(e);
}
HashMap<Range, ArrayList<String>> splitsToAdd = null;
if (!autoAdjust)
splitsToAdd = new HashMap<>();
HashMap<String, String> hostNameCache = new HashMap<>();
for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
String ip = tserverBin.getKey().split(":", 2)[0];
String location = hostNameCache.get(ip);
if (location == null) {
InetAddress inetAddress = InetAddress.getByName(ip);
location = inetAddress.getCanonicalHostName();
hostNameCache.put(ip, location);
}
for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
Range ke = extentRanges.getKey().toDataRange();
if (batchScan) {
// group ranges by tablet to be read by a BatchScanner
ArrayList<Range> clippedRanges = new ArrayList<>();
for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r));
BatchInputSplit split = new BatchInputSplit(tableName, tableId, clippedRanges, new String[] { location });
SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
splits.add(split);
} else {
// not grouping by tablet
for (Range r : extentRanges.getValue()) {
if (autoAdjust) {
// divide ranges into smaller ranges, based on the tablets
RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(), ke.clip(r), new String[] { location });
SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
split.setOffline(tableConfig.isOfflineScan());
split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
splits.add(split);
} else {
// don't divide ranges
ArrayList<String> locations = splitsToAdd.get(r);
if (locations == null)
locations = new ArrayList<>(1);
locations.add(location);
splitsToAdd.put(r, locations);
}
}
}
}
}
if (!autoAdjust)
for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonicalID(), entry.getKey(), entry.getValue().toArray(new String[0]));
SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
split.setOffline(tableConfig.isOfflineScan());
split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
splits.add(split);
}
}
return splits.toArray(new InputSplit[splits.size()]);
}
use of org.apache.accumulo.core.client.mapreduce.InputTableConfig in project accumulo by apache.
the class AccumuloMultiTableInputFormatTest method testTableQueryConfigSerialization.
/**
* Verify {@link org.apache.accumulo.core.client.mapreduce.InputTableConfig} objects get correctly serialized in the JobContext.
*/
@Test
public void testTableQueryConfigSerialization() throws IOException {
String table1Name = testName.getMethodName() + "1";
String table2Name = testName.getMethodName() + "2";
JobConf job = new JobConf();
InputTableConfig table1 = new InputTableConfig().setRanges(Collections.singletonList(new Range("a", "b"))).fetchColumns(Collections.singleton(new Pair<>(new Text("CF1"), new Text("CQ1")))).setIterators(Collections.singletonList(new IteratorSetting(50, "iter1", "iterclass1")));
InputTableConfig table2 = new InputTableConfig().setRanges(Collections.singletonList(new Range("a", "b"))).fetchColumns(Collections.singleton(new Pair<>(new Text("CF1"), new Text("CQ1")))).setIterators(Collections.singletonList(new IteratorSetting(50, "iter1", "iterclass1")));
Map<String, InputTableConfig> configMap = new HashMap<>();
configMap.put(table1Name, table1);
configMap.put(table2Name, table2);
AccumuloMultiTableInputFormat.setInputTableConfigs(job, configMap);
assertEquals(table1, AccumuloMultiTableInputFormat.getInputTableConfig(job, table1Name));
assertEquals(table2, AccumuloMultiTableInputFormat.getInputTableConfig(job, table2Name));
}
Aggregations