use of org.apache.accumulo.core.dataImpl.KeyExtent in project accumulo by apache.
the class BulkImport method load.
@Override
public void load() throws TableNotFoundException, IOException, AccumuloException, AccumuloSecurityException {
TableId tableId = context.getTableId(tableName);
FileSystem fs = VolumeConfiguration.fileSystemForPath(dir, context.getHadoopConf());
Path srcPath = checkPath(fs, dir);
SortedMap<KeyExtent, Bulk.Files> mappings;
TableOperationsImpl tableOps = new TableOperationsImpl(context);
int maxTablets = 0;
for (var prop : tableOps.getProperties(tableName)) {
if (prop.getKey().equals(Property.TABLE_BULK_MAX_TABLETS.getKey())) {
maxTablets = Integer.parseInt(prop.getValue());
break;
}
}
Retry retry = Retry.builder().infiniteRetries().retryAfter(100, MILLISECONDS).incrementBy(100, MILLISECONDS).maxWait(2, MINUTES).backOffFactor(1.5).logInterval(3, MINUTES).createRetry();
// retry if a merge occurs
boolean shouldRetry = true;
while (shouldRetry) {
if (plan == null) {
mappings = computeMappingFromFiles(fs, tableId, srcPath, maxTablets);
} else {
mappings = computeMappingFromPlan(fs, tableId, srcPath, maxTablets);
}
if (mappings.isEmpty()) {
if (ignoreEmptyDir == true) {
log.info("Attempted to import files from empty directory - {}. Zero files imported", srcPath);
return;
} else {
throw new IllegalArgumentException("Attempted to import zero files from " + srcPath);
}
}
BulkSerialize.writeLoadMapping(mappings, srcPath.toString(), fs::create);
List<ByteBuffer> args = Arrays.asList(ByteBuffer.wrap(tableId.canonical().getBytes(UTF_8)), ByteBuffer.wrap(srcPath.toString().getBytes(UTF_8)), ByteBuffer.wrap((setTime + "").getBytes(UTF_8)));
try {
tableOps.doBulkFateOperation(args, tableName);
shouldRetry = false;
} catch (AccumuloBulkMergeException ae) {
if (plan != null) {
checkPlanForSplits(ae);
}
try {
retry.waitForNextAttempt();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
log.info(ae.getMessage() + ". Retrying bulk import to " + tableName);
}
}
}
use of org.apache.accumulo.core.dataImpl.KeyExtent in project accumulo by apache.
the class BulkImport method estimateSizes.
public static Map<KeyExtent, Long> estimateSizes(AccumuloConfiguration acuConf, Path mapFile, long fileSize, Collection<KeyExtent> extents, FileSystem ns, Cache<String, Long> fileLenCache, CryptoService cs) throws IOException {
if (extents.size() == 1) {
return Collections.singletonMap(extents.iterator().next(), fileSize);
}
long totalIndexEntries = 0;
Map<KeyExtent, MLong> counts = new TreeMap<>();
for (KeyExtent keyExtent : extents) counts.put(keyExtent, new MLong(0));
Text row = new Text();
FileSKVIterator index = FileOperations.getInstance().newIndexReaderBuilder().forFile(mapFile.toString(), ns, ns.getConf(), cs).withTableConfiguration(acuConf).withFileLenCache(fileLenCache).build();
try {
while (index.hasTop()) {
Key key = index.getTopKey();
totalIndexEntries++;
key.getRow(row);
// TODO this could use a binary search
for (Entry<KeyExtent, MLong> entry : counts.entrySet()) if (entry.getKey().contains(row))
entry.getValue().l++;
index.next();
}
} finally {
try {
if (index != null)
index.close();
} catch (IOException e) {
log.debug("Failed to close " + mapFile, e);
}
}
Map<KeyExtent, Long> results = new TreeMap<>();
for (KeyExtent keyExtent : extents) {
double numEntries = counts.get(keyExtent).l;
if (numEntries == 0)
numEntries = 1;
long estSize = (long) ((numEntries / totalIndexEntries) * fileSize);
results.put(keyExtent, estSize);
}
return results;
}
use of org.apache.accumulo.core.dataImpl.KeyExtent in project accumulo by apache.
the class BulkImport method computeFileToTabletMappings.
public SortedMap<KeyExtent, Bulk.Files> computeFileToTabletMappings(FileSystem fs, TableId tableId, Path dirPath, Executor executor, ClientContext context, int maxTablets) throws IOException {
KeyExtentCache extentCache = new ConcurrentKeyExtentCache(tableId, context);
List<FileStatus> files = filterInvalid(fs.listStatus(dirPath, p -> !p.getName().equals(Constants.BULK_LOAD_MAPPING)));
// we know all of the file lens, so construct a cache and populate it in order to avoid later
// trips to the namenode
Cache<String, Long> fileLensCache = getPopulatedFileLenCache(dirPath, files);
List<CompletableFuture<Map<KeyExtent, Bulk.FileInfo>>> futures = new ArrayList<>();
CryptoService cs = CryptoServiceFactory.newDefaultInstance();
for (FileStatus fileStatus : files) {
Path filePath = fileStatus.getPath();
CompletableFuture<Map<KeyExtent, Bulk.FileInfo>> future = CompletableFuture.supplyAsync(() -> {
try {
long t1 = System.currentTimeMillis();
List<KeyExtent> extents = findOverlappingTablets(context, extentCache, filePath, fs, fileLensCache, cs);
// make sure file isn't going to too many tablets
checkTabletCount(maxTablets, extents.size(), filePath.toString());
Map<KeyExtent, Long> estSizes = estimateSizes(context.getConfiguration(), filePath, fileStatus.getLen(), extents, fs, fileLensCache, cs);
Map<KeyExtent, Bulk.FileInfo> pathLocations = new HashMap<>();
for (KeyExtent ke : extents) {
pathLocations.put(ke, new Bulk.FileInfo(filePath, estSizes.getOrDefault(ke, 0L)));
}
long t2 = System.currentTimeMillis();
log.debug("Mapped {} to {} tablets in {}ms", filePath, pathLocations.size(), t2 - t1);
return pathLocations;
} catch (Exception e) {
throw new CompletionException(e);
}
}, executor);
futures.add(future);
}
SortedMap<KeyExtent, Bulk.Files> mappings = new TreeMap<>();
for (CompletableFuture<Map<KeyExtent, Bulk.FileInfo>> future : futures) {
try {
Map<KeyExtent, Bulk.FileInfo> pathMapping = future.get();
pathMapping.forEach((ext, fi) -> mappings.computeIfAbsent(ext, k -> new Files()).add(fi));
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e);
} catch (ExecutionException e) {
throw new RuntimeException(e);
}
}
return mergeOverlapping(mappings);
}
use of org.apache.accumulo.core.dataImpl.KeyExtent in project accumulo by apache.
the class BulkImport method findOverlappingTablets.
public static List<KeyExtent> findOverlappingTablets(KeyExtentCache extentCache, FileSKVIterator reader) throws IOException {
List<KeyExtent> result = new ArrayList<>();
Collection<ByteSequence> columnFamilies = Collections.emptyList();
Text row = new Text();
while (true) {
reader.seek(new Range(row, null), columnFamilies, false);
if (!reader.hasTop()) {
break;
}
row = reader.getTopKey().getRow();
KeyExtent extent = extentCache.lookup(row);
result.add(extent);
row = extent.endRow();
if (row != null) {
row = nextRow(row);
} else
break;
}
return result;
}
use of org.apache.accumulo.core.dataImpl.KeyExtent in project accumulo by apache.
the class TabletServerBatchReaderIterator method doLookup.
static void doLookup(ClientContext context, String server, Map<KeyExtent, List<Range>> requested, Map<KeyExtent, List<Range>> failures, Map<KeyExtent, List<Range>> unscanned, ResultReceiver receiver, List<Column> columns, ScannerOptions options, Authorizations authorizations, TimeoutTracker timeoutTracker) throws IOException, AccumuloSecurityException, AccumuloServerException {
if (requested.isEmpty()) {
return;
}
// copy requested to unscanned map. we will remove ranges as they are scanned in trackScanning()
for (Entry<KeyExtent, List<Range>> entry : requested.entrySet()) {
ArrayList<Range> ranges = new ArrayList<>();
for (Range range : entry.getValue()) {
ranges.add(new Range(range));
}
unscanned.put(KeyExtent.copyOf(entry.getKey()), ranges);
}
timeoutTracker.startingScan();
try {
final HostAndPort parsedServer = HostAndPort.fromString(server);
final TabletClientService.Client client;
if (timeoutTracker.getTimeOut() < context.getClientTimeoutInMillis())
client = ThriftUtil.getTServerClient(parsedServer, context, timeoutTracker.getTimeOut());
else
client = ThriftUtil.getTServerClient(parsedServer, context);
try {
OpTimer timer = null;
if (log.isTraceEnabled()) {
log.trace("tid={} Starting multi scan, tserver={} #tablets={} #ranges={} ssil={} ssio={}", Thread.currentThread().getId(), server, requested.size(), sumSizes(requested.values()), options.serverSideIteratorList, options.serverSideIteratorOptions);
timer = new OpTimer().start();
}
TabletType ttype = TabletType.type(requested.keySet());
boolean waitForWrites = !ThriftScanner.serversWaitedForWrites.get(ttype).contains(server);
// @formatter:off
Map<TKeyExtent, List<TRange>> thriftTabletRanges = requested.entrySet().stream().collect(Collectors.toMap((entry) -> entry.getKey().toThrift(), (entry) -> entry.getValue().stream().map(Range::toThrift).collect(Collectors.toList())));
// @formatter:on
Map<String, String> execHints = options.executionHints.isEmpty() ? null : options.executionHints;
InitialMultiScan imsr = client.startMultiScan(TraceUtil.traceInfo(), context.rpcCreds(), thriftTabletRanges, columns.stream().map(Column::toThrift).collect(Collectors.toList()), options.serverSideIteratorList, options.serverSideIteratorOptions, ByteBufferUtil.toByteBuffers(authorizations.getAuthorizations()), waitForWrites, SamplerConfigurationImpl.toThrift(options.getSamplerConfiguration()), options.batchTimeOut, options.classLoaderContext, execHints);
if (waitForWrites)
ThriftScanner.serversWaitedForWrites.get(ttype).add(server.toString());
MultiScanResult scanResult = imsr.result;
if (timer != null) {
timer.stop();
log.trace("tid={} Got 1st multi scan results, #results={} {} in {}", Thread.currentThread().getId(), scanResult.results.size(), (scanResult.more ? "scanID=" + imsr.scanID : ""), String.format("%.3f secs", timer.scale(SECONDS)));
}
ArrayList<Entry<Key, Value>> entries = new ArrayList<>(scanResult.results.size());
for (TKeyValue kv : scanResult.results) {
entries.add(new SimpleImmutableEntry<>(new Key(kv.key), new Value(kv.value)));
}
if (!entries.isEmpty())
receiver.receive(entries);
if (!entries.isEmpty() || !scanResult.fullScans.isEmpty())
timeoutTracker.madeProgress();
trackScanning(failures, unscanned, scanResult);
AtomicLong nextOpid = new AtomicLong();
while (scanResult.more) {
timeoutTracker.check();
if (timer != null) {
log.trace("tid={} oid={} Continuing multi scan, scanid={}", Thread.currentThread().getId(), nextOpid.get(), imsr.scanID);
timer.reset().start();
}
scanResult = client.continueMultiScan(TraceUtil.traceInfo(), imsr.scanID);
if (timer != null) {
timer.stop();
log.trace("tid={} oid={} Got more multi scan results, #results={} {} in {}", Thread.currentThread().getId(), nextOpid.getAndIncrement(), scanResult.results.size(), (scanResult.more ? " scanID=" + imsr.scanID : ""), String.format("%.3f secs", timer.scale(SECONDS)));
}
entries = new ArrayList<>(scanResult.results.size());
for (TKeyValue kv : scanResult.results) {
entries.add(new SimpleImmutableEntry<>(new Key(kv.key), new Value(kv.value)));
}
if (!entries.isEmpty())
receiver.receive(entries);
if (!entries.isEmpty() || !scanResult.fullScans.isEmpty())
timeoutTracker.madeProgress();
trackScanning(failures, unscanned, scanResult);
}
client.closeMultiScan(TraceUtil.traceInfo(), imsr.scanID);
} finally {
ThriftUtil.returnClient(client, context);
}
} catch (TTransportException e) {
log.debug("Server : {} msg : {}", server, e.getMessage());
timeoutTracker.errorOccured();
throw new IOException(e);
} catch (ThriftSecurityException e) {
log.debug("Server : {} msg : {}", server, e.getMessage(), e);
throw new AccumuloSecurityException(e.user, e.code, e);
} catch (TApplicationException e) {
log.debug("Server : {} msg : {}", server, e.getMessage(), e);
throw new AccumuloServerException(server, e);
} catch (NoSuchScanIDException e) {
log.debug("Server : {} msg : {}", server, e.getMessage(), e);
throw new IOException(e);
} catch (TSampleNotPresentException e) {
log.debug("Server : " + server + " msg : " + e.getMessage(), e);
String tableInfo = "?";
if (e.getExtent() != null) {
TableId tableId = KeyExtent.fromThrift(e.getExtent()).tableId();
tableInfo = context.getPrintableTableInfoFromId(tableId);
}
String message = "Table " + tableInfo + " does not have sampling configured or built";
throw new SampleNotPresentException(message, e);
} catch (TException e) {
log.debug("Server : {} msg : {}", server, e.getMessage(), e);
timeoutTracker.errorOccured();
throw new IOException(e);
}
}
Aggregations