use of org.apache.accumulo.core.clientImpl.ClientContext in project accumulo by apache.
the class AbstractInputFormat method getSplits.
/**
* Gets the splits of the tables that have been set on the job by reading the metadata table for
* the specified ranges.
*
* @return the splits from the tables based on the ranges.
* @throws java.io.IOException
* if a table set on the job doesn't exist or an error occurs initializing the tablet
* locator
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
Level logLevel = getLogLevel(job);
log.setLevel(logLevel);
validateOptions(job);
LinkedList<InputSplit> splits = new LinkedList<>();
Map<String, InputTableConfig> tableConfigs = getInputTableConfigs(job);
for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
String tableName = tableConfigEntry.getKey();
InputTableConfig tableConfig = tableConfigEntry.getValue();
ClientContext client;
try {
client = InputConfigurator.client(CLASS, job.getConfiguration());
} catch (AccumuloException | AccumuloSecurityException e) {
throw new IOException(e);
}
TableId tableId;
// resolve table name to id once, and use id from this point forward
try {
tableId = client.getTableId(tableName);
} catch (TableNotFoundException e) {
throw new IOException(e);
}
boolean batchScan = InputConfigurator.isBatchScan(CLASS, job.getConfiguration());
boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners() || tableConfig.shouldUseLocalIterators());
if (batchScan && !supportBatchScan)
throw new IllegalArgumentException("BatchScanner optimization not available for offline" + " scan, isolated, or local iterators");
boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
if (batchScan && !autoAdjust)
throw new IllegalArgumentException("AutoAdjustRanges must be enabled when using BatchScanner optimization");
List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
if (ranges.isEmpty()) {
ranges = new ArrayList<>(1);
ranges.add(new Range());
}
// get the metadata information for these ranges
Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<>();
TabletLocator tl;
try {
if (tableConfig.isOfflineScan()) {
binnedRanges = binOfflineTable(job, tableId, ranges);
while (binnedRanges == null) {
// Some tablets were still online, try again
// sleep randomly between 100 and 200 ms
sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
binnedRanges = binOfflineTable(job, tableId, ranges);
}
} else {
tl = TabletLocator.getLocator(client, tableId);
// its possible that the cache could contain complete, but old information about a
// tables tablets... so clear it
tl.invalidateCache();
while (!tl.binRanges(client, ranges, binnedRanges).isEmpty()) {
client.requireNotDeleted(tableId);
client.requireNotOffline(tableId, tableName);
binnedRanges.clear();
log.warn("Unable to locate bins for specified ranges. Retrying.");
// sleep randomly between 100 and 200 ms
sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
tl.invalidateCache();
}
}
} catch (Exception e) {
throw new IOException(e);
}
// all of this code will add either range per each locations or split ranges and add
// range-location split
// Map from Range to Array of Locations, we only use this if we're don't split
HashMap<Range, ArrayList<String>> splitsToAdd = null;
if (!autoAdjust)
splitsToAdd = new HashMap<>();
HashMap<String, String> hostNameCache = new HashMap<>();
for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
String ip = tserverBin.getKey().split(":", 2)[0];
String location = hostNameCache.get(ip);
if (location == null) {
InetAddress inetAddress = InetAddress.getByName(ip);
location = inetAddress.getCanonicalHostName();
hostNameCache.put(ip, location);
}
for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
Range ke = extentRanges.getKey().toDataRange();
if (batchScan) {
// group ranges by tablet to be read by a BatchScanner
ArrayList<Range> clippedRanges = new ArrayList<>();
for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r));
org.apache.accumulo.core.clientImpl.mapreduce.BatchInputSplit split = new org.apache.accumulo.core.clientImpl.mapreduce.BatchInputSplit(tableName, tableId, clippedRanges, new String[] { location });
org.apache.accumulo.core.clientImpl.mapreduce.SplitUtils.updateSplit(split, tableConfig, logLevel);
splits.add(split);
} else {
// not grouping by tablet
for (Range r : extentRanges.getValue()) {
if (autoAdjust) {
// divide ranges into smaller ranges, based on the tablets
RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonical(), ke.clip(r), new String[] { location });
org.apache.accumulo.core.clientImpl.mapreduce.SplitUtils.updateSplit(split, tableConfig, logLevel);
split.setOffline(tableConfig.isOfflineScan());
split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
splits.add(split);
} else {
// don't divide ranges
ArrayList<String> locations = splitsToAdd.get(r);
if (locations == null)
locations = new ArrayList<>(1);
locations.add(location);
splitsToAdd.put(r, locations);
}
}
}
}
}
if (!autoAdjust)
for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
RangeInputSplit split = new RangeInputSplit(tableName, tableId.canonical(), entry.getKey(), entry.getValue().toArray(new String[0]));
org.apache.accumulo.core.clientImpl.mapreduce.SplitUtils.updateSplit(split, tableConfig, logLevel);
split.setOffline(tableConfig.isOfflineScan());
split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());
splits.add(split);
}
}
return splits;
}
use of org.apache.accumulo.core.clientImpl.ClientContext in project accumulo by apache.
the class AbstractInputFormat method setConnectorInfo.
/**
* Sets the connector information needed to communicate with Accumulo in this job.
*
* <p>
* <b>WARNING:</b> Some tokens, when serialized, divulge sensitive information in the
* configuration as a means to pass the token to MapReduce tasks. This information is BASE64
* encoded to provide a charset safe conversion to a string, but this conversion is not intended
* to be secure. {@link PasswordToken} is one example that is insecure in this way; however
* {@link DelegationToken}s, acquired using
* {@link SecurityOperations#getDelegationToken(DelegationTokenConfig)}, is not subject to this
* concern.
*
* @param job
* the Hadoop job instance to be configured
* @param principal
* a valid Accumulo user name (user must have Table.CREATE permission)
* @param token
* the user's password
* @since 1.5.0
*/
public static void setConnectorInfo(Job job, String principal, AuthenticationToken token) throws AccumuloSecurityException {
if (token instanceof KerberosToken) {
log.info("Received KerberosToken, attempting to fetch DelegationToken");
try {
ClientContext client = InputConfigurator.client(CLASS, job.getConfiguration());
token = client.securityOperations().getDelegationToken(new DelegationTokenConfig());
} catch (Exception e) {
log.warn("Failed to automatically obtain DelegationToken, " + "Mappers/Reducers will likely fail to communicate with Accumulo", e);
}
}
// the configuration
if (token instanceof DelegationTokenImpl) {
DelegationTokenImpl delegationToken = (DelegationTokenImpl) token;
// Convert it into a Hadoop Token
AuthenticationTokenIdentifier identifier = delegationToken.getIdentifier();
Token<AuthenticationTokenIdentifier> hadoopToken = new Token<>(identifier.getBytes(), delegationToken.getPassword(), identifier.getKind(), delegationToken.getServiceName());
// Add the Hadoop Token to the Job so it gets serialized and passed along.
job.getCredentials().addToken(hadoopToken.getService(), hadoopToken);
}
InputConfigurator.setConnectorInfo(CLASS, job.getConfiguration(), principal, token);
}
use of org.apache.accumulo.core.clientImpl.ClientContext in project accumulo by apache.
the class ZooKeeperInstance method getConnector.
@Override
public Connector getConnector(String principal, AuthenticationToken token) throws AccumuloException, AccumuloSecurityException {
Properties properties = ClientConfConverter.toProperties(clientConf);
properties.setProperty(ClientProperty.AUTH_PRINCIPAL.getKey(), principal);
properties.setProperty(ClientProperty.INSTANCE_NAME.getKey(), getInstanceName());
ClientInfo info = new ClientInfoImpl(properties, token);
AccumuloConfiguration serverConf = ClientConfConverter.toAccumuloConf(properties);
return new org.apache.accumulo.core.clientImpl.ConnectorImpl(new ClientContext(SingletonReservation.noop(), info, serverConf));
}
use of org.apache.accumulo.core.clientImpl.ClientContext in project accumulo by apache.
the class BulkImport method computeFileToTabletMappings.
public SortedMap<KeyExtent, Bulk.Files> computeFileToTabletMappings(FileSystem fs, TableId tableId, Path dirPath, Executor executor, ClientContext context, int maxTablets) throws IOException {
KeyExtentCache extentCache = new ConcurrentKeyExtentCache(tableId, context);
List<FileStatus> files = filterInvalid(fs.listStatus(dirPath, p -> !p.getName().equals(Constants.BULK_LOAD_MAPPING)));
// we know all of the file lens, so construct a cache and populate it in order to avoid later
// trips to the namenode
Cache<String, Long> fileLensCache = getPopulatedFileLenCache(dirPath, files);
List<CompletableFuture<Map<KeyExtent, Bulk.FileInfo>>> futures = new ArrayList<>();
CryptoService cs = CryptoServiceFactory.newDefaultInstance();
for (FileStatus fileStatus : files) {
Path filePath = fileStatus.getPath();
CompletableFuture<Map<KeyExtent, Bulk.FileInfo>> future = CompletableFuture.supplyAsync(() -> {
try {
long t1 = System.currentTimeMillis();
List<KeyExtent> extents = findOverlappingTablets(context, extentCache, filePath, fs, fileLensCache, cs);
// make sure file isn't going to too many tablets
checkTabletCount(maxTablets, extents.size(), filePath.toString());
Map<KeyExtent, Long> estSizes = estimateSizes(context.getConfiguration(), filePath, fileStatus.getLen(), extents, fs, fileLensCache, cs);
Map<KeyExtent, Bulk.FileInfo> pathLocations = new HashMap<>();
for (KeyExtent ke : extents) {
pathLocations.put(ke, new Bulk.FileInfo(filePath, estSizes.getOrDefault(ke, 0L)));
}
long t2 = System.currentTimeMillis();
log.debug("Mapped {} to {} tablets in {}ms", filePath, pathLocations.size(), t2 - t1);
return pathLocations;
} catch (Exception e) {
throw new CompletionException(e);
}
}, executor);
futures.add(future);
}
SortedMap<KeyExtent, Bulk.Files> mappings = new TreeMap<>();
for (CompletableFuture<Map<KeyExtent, Bulk.FileInfo>> future : futures) {
try {
Map<KeyExtent, Bulk.FileInfo> pathMapping = future.get();
pathMapping.forEach((ext, fi) -> mappings.computeIfAbsent(ext, k -> new Files()).add(fi));
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e);
} catch (ExecutionException e) {
throw new RuntimeException(e);
}
}
return mergeOverlapping(mappings);
}
use of org.apache.accumulo.core.clientImpl.ClientContext in project accumulo by apache.
the class MetadataLocationObtainer method lookupTablets.
@Override
public List<TabletLocation> lookupTablets(ClientContext context, String tserver, Map<KeyExtent, List<Range>> tabletsRanges, TabletLocator parent) throws AccumuloSecurityException, AccumuloException {
final TreeMap<Key, Value> results = new TreeMap<>();
ResultReceiver rr = entries -> {
for (Entry<Key, Value> entry : entries) {
try {
results.putAll(WholeRowIterator.decodeRow(entry.getKey(), entry.getValue()));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
ScannerOptions opts = null;
try (SettableScannerOptions unsetOpts = new SettableScannerOptions()) {
opts = unsetOpts.setColumns(locCols);
}
Map<KeyExtent, List<Range>> unscanned = new HashMap<>();
Map<KeyExtent, List<Range>> failures = new HashMap<>();
try {
TabletServerBatchReaderIterator.doLookup(context, tserver, tabletsRanges, failures, unscanned, rr, columns, opts, Authorizations.EMPTY);
if (!failures.isEmpty()) {
// invalidate extents in parents cache
if (log.isTraceEnabled())
log.trace("lookupTablets failed for {} extents", failures.size());
parent.invalidateCache(failures.keySet());
}
} catch (IOException e) {
log.trace("lookupTablets failed server={}", tserver, e);
parent.invalidateCache(context, tserver);
} catch (AccumuloServerException e) {
log.trace("lookupTablets failed server={}", tserver, e);
throw e;
}
return MetadataLocationObtainer.getMetadataLocationEntries(results).getLocations();
}
Aggregations