use of org.apache.accumulo.core.dataImpl.KeyExtent in project accumulo by apache.
the class BulkImport method computeFileToTabletMappings.
public SortedMap<KeyExtent, Bulk.Files> computeFileToTabletMappings(FileSystem fs, TableId tableId, Path dirPath, Executor executor, ClientContext context, int maxTablets) throws IOException {
KeyExtentCache extentCache = new ConcurrentKeyExtentCache(tableId, context);
List<FileStatus> files = filterInvalid(fs.listStatus(dirPath, p -> !p.getName().equals(Constants.BULK_LOAD_MAPPING)));
// we know all of the file lens, so construct a cache and populate it in order to avoid later
// trips to the namenode
Cache<String, Long> fileLensCache = getPopulatedFileLenCache(dirPath, files);
List<CompletableFuture<Map<KeyExtent, Bulk.FileInfo>>> futures = new ArrayList<>();
CryptoService cs = CryptoServiceFactory.newDefaultInstance();
for (FileStatus fileStatus : files) {
Path filePath = fileStatus.getPath();
CompletableFuture<Map<KeyExtent, Bulk.FileInfo>> future = CompletableFuture.supplyAsync(() -> {
try {
long t1 = System.currentTimeMillis();
List<KeyExtent> extents = findOverlappingTablets(context, extentCache, filePath, fs, fileLensCache, cs);
// make sure file isn't going to too many tablets
checkTabletCount(maxTablets, extents.size(), filePath.toString());
Map<KeyExtent, Long> estSizes = estimateSizes(context.getConfiguration(), filePath, fileStatus.getLen(), extents, fs, fileLensCache, cs);
Map<KeyExtent, Bulk.FileInfo> pathLocations = new HashMap<>();
for (KeyExtent ke : extents) {
pathLocations.put(ke, new Bulk.FileInfo(filePath, estSizes.getOrDefault(ke, 0L)));
}
long t2 = System.currentTimeMillis();
log.debug("Mapped {} to {} tablets in {}ms", filePath, pathLocations.size(), t2 - t1);
return pathLocations;
} catch (Exception e) {
throw new CompletionException(e);
}
}, executor);
futures.add(future);
}
SortedMap<KeyExtent, Bulk.Files> mappings = new TreeMap<>();
for (CompletableFuture<Map<KeyExtent, Bulk.FileInfo>> future : futures) {
try {
Map<KeyExtent, Bulk.FileInfo> pathMapping = future.get();
pathMapping.forEach((ext, fi) -> mappings.computeIfAbsent(ext, k -> new Files()).add(fi));
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e);
} catch (ExecutionException e) {
throw new RuntimeException(e);
}
}
return mergeOverlapping(mappings);
}
use of org.apache.accumulo.core.dataImpl.KeyExtent in project accumulo by apache.
the class BulkImport method findOverlappingTablets.
public static List<KeyExtent> findOverlappingTablets(KeyExtentCache extentCache, FileSKVIterator reader) throws IOException {
List<KeyExtent> result = new ArrayList<>();
Collection<ByteSequence> columnFamilies = Collections.emptyList();
Text row = new Text();
while (true) {
reader.seek(new Range(row, null), columnFamilies, false);
if (!reader.hasTop()) {
break;
}
row = reader.getTopKey().getRow();
KeyExtent extent = extentCache.lookup(row);
result.add(extent);
row = extent.endRow();
if (row != null) {
row = nextRow(row);
} else
break;
}
return result;
}
use of org.apache.accumulo.core.dataImpl.KeyExtent in project accumulo by apache.
the class TabletServerBatchReaderIterator method doLookup.
static void doLookup(ClientContext context, String server, Map<KeyExtent, List<Range>> requested, Map<KeyExtent, List<Range>> failures, Map<KeyExtent, List<Range>> unscanned, ResultReceiver receiver, List<Column> columns, ScannerOptions options, Authorizations authorizations, TimeoutTracker timeoutTracker) throws IOException, AccumuloSecurityException, AccumuloServerException {
if (requested.isEmpty()) {
return;
}
// copy requested to unscanned map. we will remove ranges as they are scanned in trackScanning()
for (Entry<KeyExtent, List<Range>> entry : requested.entrySet()) {
ArrayList<Range> ranges = new ArrayList<>();
for (Range range : entry.getValue()) {
ranges.add(new Range(range));
}
unscanned.put(KeyExtent.copyOf(entry.getKey()), ranges);
}
timeoutTracker.startingScan();
try {
final HostAndPort parsedServer = HostAndPort.fromString(server);
final TabletClientService.Client client;
if (timeoutTracker.getTimeOut() < context.getClientTimeoutInMillis())
client = ThriftUtil.getTServerClient(parsedServer, context, timeoutTracker.getTimeOut());
else
client = ThriftUtil.getTServerClient(parsedServer, context);
try {
OpTimer timer = null;
if (log.isTraceEnabled()) {
log.trace("tid={} Starting multi scan, tserver={} #tablets={} #ranges={} ssil={} ssio={}", Thread.currentThread().getId(), server, requested.size(), sumSizes(requested.values()), options.serverSideIteratorList, options.serverSideIteratorOptions);
timer = new OpTimer().start();
}
TabletType ttype = TabletType.type(requested.keySet());
boolean waitForWrites = !ThriftScanner.serversWaitedForWrites.get(ttype).contains(server);
// @formatter:off
Map<TKeyExtent, List<TRange>> thriftTabletRanges = requested.entrySet().stream().collect(Collectors.toMap((entry) -> entry.getKey().toThrift(), (entry) -> entry.getValue().stream().map(Range::toThrift).collect(Collectors.toList())));
// @formatter:on
Map<String, String> execHints = options.executionHints.isEmpty() ? null : options.executionHints;
InitialMultiScan imsr = client.startMultiScan(TraceUtil.traceInfo(), context.rpcCreds(), thriftTabletRanges, columns.stream().map(Column::toThrift).collect(Collectors.toList()), options.serverSideIteratorList, options.serverSideIteratorOptions, ByteBufferUtil.toByteBuffers(authorizations.getAuthorizations()), waitForWrites, SamplerConfigurationImpl.toThrift(options.getSamplerConfiguration()), options.batchTimeOut, options.classLoaderContext, execHints);
if (waitForWrites)
ThriftScanner.serversWaitedForWrites.get(ttype).add(server.toString());
MultiScanResult scanResult = imsr.result;
if (timer != null) {
timer.stop();
log.trace("tid={} Got 1st multi scan results, #results={} {} in {}", Thread.currentThread().getId(), scanResult.results.size(), (scanResult.more ? "scanID=" + imsr.scanID : ""), String.format("%.3f secs", timer.scale(SECONDS)));
}
ArrayList<Entry<Key, Value>> entries = new ArrayList<>(scanResult.results.size());
for (TKeyValue kv : scanResult.results) {
entries.add(new SimpleImmutableEntry<>(new Key(kv.key), new Value(kv.value)));
}
if (!entries.isEmpty())
receiver.receive(entries);
if (!entries.isEmpty() || !scanResult.fullScans.isEmpty())
timeoutTracker.madeProgress();
trackScanning(failures, unscanned, scanResult);
AtomicLong nextOpid = new AtomicLong();
while (scanResult.more) {
timeoutTracker.check();
if (timer != null) {
log.trace("tid={} oid={} Continuing multi scan, scanid={}", Thread.currentThread().getId(), nextOpid.get(), imsr.scanID);
timer.reset().start();
}
scanResult = client.continueMultiScan(TraceUtil.traceInfo(), imsr.scanID);
if (timer != null) {
timer.stop();
log.trace("tid={} oid={} Got more multi scan results, #results={} {} in {}", Thread.currentThread().getId(), nextOpid.getAndIncrement(), scanResult.results.size(), (scanResult.more ? " scanID=" + imsr.scanID : ""), String.format("%.3f secs", timer.scale(SECONDS)));
}
entries = new ArrayList<>(scanResult.results.size());
for (TKeyValue kv : scanResult.results) {
entries.add(new SimpleImmutableEntry<>(new Key(kv.key), new Value(kv.value)));
}
if (!entries.isEmpty())
receiver.receive(entries);
if (!entries.isEmpty() || !scanResult.fullScans.isEmpty())
timeoutTracker.madeProgress();
trackScanning(failures, unscanned, scanResult);
}
client.closeMultiScan(TraceUtil.traceInfo(), imsr.scanID);
} finally {
ThriftUtil.returnClient(client, context);
}
} catch (TTransportException e) {
log.debug("Server : {} msg : {}", server, e.getMessage());
timeoutTracker.errorOccured();
throw new IOException(e);
} catch (ThriftSecurityException e) {
log.debug("Server : {} msg : {}", server, e.getMessage(), e);
throw new AccumuloSecurityException(e.user, e.code, e);
} catch (TApplicationException e) {
log.debug("Server : {} msg : {}", server, e.getMessage(), e);
throw new AccumuloServerException(server, e);
} catch (NoSuchScanIDException e) {
log.debug("Server : {} msg : {}", server, e.getMessage(), e);
throw new IOException(e);
} catch (TSampleNotPresentException e) {
log.debug("Server : " + server + " msg : " + e.getMessage(), e);
String tableInfo = "?";
if (e.getExtent() != null) {
TableId tableId = KeyExtent.fromThrift(e.getExtent()).tableId();
tableInfo = context.getPrintableTableInfoFromId(tableId);
}
String message = "Table " + tableInfo + " does not have sampling configured or built";
throw new SampleNotPresentException(message, e);
} catch (TException e) {
log.debug("Server : {} msg : {}", server, e.getMessage(), e);
timeoutTracker.errorOccured();
throw new IOException(e);
}
}
use of org.apache.accumulo.core.dataImpl.KeyExtent in project accumulo by apache.
the class TabletServerBatchReaderIterator method doLookups.
private void doLookups(Map<String, Map<KeyExtent, List<Range>>> binnedRanges, final ResultReceiver receiver, List<Column> columns) {
if (timedoutServers.containsAll(binnedRanges.keySet())) {
// all servers have timed out
throw new TimedOutException(timedoutServers);
}
// when there are lots of threads and a few tablet servers
// it is good to break request to tablet servers up, the
// following code determines if this is the case
int maxTabletsPerRequest = Integer.MAX_VALUE;
if (numThreads / binnedRanges.size() > 1) {
int totalNumberOfTablets = 0;
for (Entry<String, Map<KeyExtent, List<Range>>> entry : binnedRanges.entrySet()) {
totalNumberOfTablets += entry.getValue().size();
}
maxTabletsPerRequest = totalNumberOfTablets / numThreads;
if (maxTabletsPerRequest == 0) {
maxTabletsPerRequest = 1;
}
}
Map<KeyExtent, List<Range>> failures = new HashMap<>();
if (!timedoutServers.isEmpty()) {
// go ahead and fail any timed out servers
for (Iterator<Entry<String, Map<KeyExtent, List<Range>>>> iterator = binnedRanges.entrySet().iterator(); iterator.hasNext(); ) {
Entry<String, Map<KeyExtent, List<Range>>> entry = iterator.next();
if (timedoutServers.contains(entry.getKey())) {
failures.putAll(entry.getValue());
iterator.remove();
}
}
}
// randomize tabletserver order... this will help when there are multiple
// batch readers and writers running against accumulo
List<String> locations = new ArrayList<>(binnedRanges.keySet());
Collections.shuffle(locations);
List<QueryTask> queryTasks = new ArrayList<>();
for (final String tsLocation : locations) {
final Map<KeyExtent, List<Range>> tabletsRanges = binnedRanges.get(tsLocation);
if (maxTabletsPerRequest == Integer.MAX_VALUE || tabletsRanges.size() == 1) {
QueryTask queryTask = new QueryTask(tsLocation, tabletsRanges, failures, receiver, columns);
queryTasks.add(queryTask);
} else {
HashMap<KeyExtent, List<Range>> tabletSubset = new HashMap<>();
for (Entry<KeyExtent, List<Range>> entry : tabletsRanges.entrySet()) {
tabletSubset.put(entry.getKey(), entry.getValue());
if (tabletSubset.size() >= maxTabletsPerRequest) {
QueryTask queryTask = new QueryTask(tsLocation, tabletSubset, failures, receiver, columns);
queryTasks.add(queryTask);
tabletSubset = new HashMap<>();
}
}
if (!tabletSubset.isEmpty()) {
QueryTask queryTask = new QueryTask(tsLocation, tabletSubset, failures, receiver, columns);
queryTasks.add(queryTask);
}
}
}
final Semaphore semaphore = new Semaphore(queryTasks.size());
semaphore.acquireUninterruptibly(queryTasks.size());
for (QueryTask queryTask : queryTasks) {
queryTask.setSemaphore(semaphore, queryTasks.size());
queryThreadPool.execute(queryTask);
}
}
use of org.apache.accumulo.core.dataImpl.KeyExtent in project accumulo by apache.
the class ConcurrentKeyExtentCache method lookup.
@Override
public KeyExtent lookup(Text row) {
while (true) {
KeyExtent ke = getFromCache(row);
if (ke != null)
return ke;
// If a metadata lookup is currently in progress, then multiple threads can queue up their
// rows. The next lookup will process all queued. Processing multiple at once can be more
// efficient.
rowsToLookup.add(row);
synchronized (this) {
// This check is done to avoid processing rowsToLookup when the current thread's row is in
// the cache.
ke = getFromCache(row);
if (ke != null) {
rowsToLookup.remove(row);
return ke;
}
lookupRows.clear();
synchronized (rowsToLookup) {
// Gather all rows that were queued for lookup before this point in time.
rowsToLookup.forEach(lookupRows::add);
rowsToLookup.clear();
}
// Lookup rows in the metadata table in sorted order. This could possibly lead to less
// metadata lookups.
lookupRows.sort(Text::compareTo);
for (Text lookupRow : lookupRows) {
if (getFromCache(lookupRow) == null) {
Iterator<KeyExtent> iter = lookupExtents(lookupRow).iterator();
while (iter.hasNext()) {
KeyExtent ke2 = iter.next();
if (inCache(ke2))
break;
updateCache(ke2);
}
}
}
}
}
}
Aggregations