Search in sources :

Example 1 with Destination

use of org.apache.accumulo.core.data.LoadPlan.Destination in project accumulo by apache.

the class BulkImport method computeMappingFromPlan.

private SortedMap<KeyExtent, Files> computeMappingFromPlan(FileSystem fs, TableId tableId, Path srcPath, int maxTablets) throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException {
    Map<String, List<Destination>> fileDestinations = plan.getDestinations().stream().collect(groupingBy(Destination::getFileName));
    List<FileStatus> statuses = filterInvalid(fs.listStatus(srcPath, p -> !p.getName().equals(Constants.BULK_LOAD_MAPPING)));
    Map<String, Long> fileLens = getFileLenMap(statuses);
    if (!fileDestinations.keySet().equals(fileLens.keySet())) {
        throw new IllegalArgumentException("Load plan files differ from directory files, symmetric difference : " + Sets.symmetricDifference(fileDestinations.keySet(), fileLens.keySet()));
    }
    KeyExtentCache extentCache = new ConcurrentKeyExtentCache(tableId, context);
    // Pre-populate cache by looking up all end rows in sorted order. Doing this in sorted order
    // leverages read ahead.
    fileDestinations.values().stream().flatMap(List::stream).filter(dest -> dest.getRangeType() == RangeType.FILE).flatMap(dest -> Stream.of(dest.getStartRow(), dest.getEndRow())).filter(Objects::nonNull).map(Text::new).sorted().distinct().forEach(row -> {
        try {
            extentCache.lookup(row);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    });
    SortedMap<KeyExtent, Files> mapping = new TreeMap<>();
    for (Entry<String, List<Destination>> entry : fileDestinations.entrySet()) {
        String fileName = entry.getKey();
        List<Destination> destinations = entry.getValue();
        Set<KeyExtent> extents = mapDestinationsToExtents(tableId, extentCache, destinations);
        log.debug("The file {} mapped to {} tablets.", fileName, extents.size());
        checkTabletCount(maxTablets, extents.size(), fileName);
        long estSize = (long) (fileLens.get(fileName) / (double) extents.size());
        for (KeyExtent keyExtent : extents) {
            mapping.computeIfAbsent(keyExtent, k -> new Files()).add(new FileInfo(fileName, estSize, 0));
        }
    }
    return mergeOverlapping(mapping);
}
Also used : TableId(org.apache.accumulo.core.data.TableId) ByteSequence(org.apache.accumulo.core.data.ByteSequence) Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) LoggerFactory(org.slf4j.LoggerFactory) Text(org.apache.hadoop.io.Text) FileStatus(org.apache.hadoop.fs.FileStatus) ByteBuffer(java.nio.ByteBuffer) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) CachableBlockFile.pathToCacheId(org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.pathToCacheId) ConfigurationTypeHelper(org.apache.accumulo.core.conf.ConfigurationTypeHelper) FileOperations(org.apache.accumulo.core.file.FileOperations) AccumuloBulkMergeException(org.apache.accumulo.core.clientImpl.AccumuloBulkMergeException) TableOperationsImpl(org.apache.accumulo.core.clientImpl.TableOperationsImpl) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) FileInfo(org.apache.accumulo.core.clientImpl.bulk.Bulk.FileInfo) Property(org.apache.accumulo.core.conf.Property) LoadPlan(org.apache.accumulo.core.data.LoadPlan) ClientContext(org.apache.accumulo.core.clientImpl.ClientContext) Collection(java.util.Collection) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) ThreadPools(org.apache.accumulo.core.util.threads.ThreadPools) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) RangeType(org.apache.accumulo.core.data.LoadPlan.RangeType) FileNotFoundException(java.io.FileNotFoundException) Sets(com.google.common.collect.Sets) VolumeConfiguration(org.apache.accumulo.core.volume.VolumeConfiguration) Objects(java.util.Objects) List(java.util.List) Stream(java.util.stream.Stream) Entry(java.util.Map.Entry) Files(org.apache.accumulo.core.clientImpl.bulk.Bulk.Files) ImportDestinationArguments(org.apache.accumulo.core.client.admin.TableOperations.ImportDestinationArguments) ImportMappingOptions(org.apache.accumulo.core.client.admin.TableOperations.ImportMappingOptions) CacheBuilder(com.google.common.cache.CacheBuilder) SortedMap(java.util.SortedMap) FilenameUtils(org.apache.commons.io.FilenameUtils) Collectors.groupingBy(java.util.stream.Collectors.groupingBy) Destination(org.apache.accumulo.core.data.LoadPlan.Destination) MINUTES(java.util.concurrent.TimeUnit.MINUTES) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) Key(org.apache.accumulo.core.data.Key) ExecutorService(java.util.concurrent.ExecutorService) EXISTING_TABLE_NAME(org.apache.accumulo.core.util.Validators.EXISTING_TABLE_NAME) Retry(org.apache.accumulo.fate.util.Retry) Logger(org.slf4j.Logger) CryptoService(org.apache.accumulo.core.spi.crypto.CryptoService) Executor(java.util.concurrent.Executor) UTF_8(java.nio.charset.StandardCharsets.UTF_8) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) IOException(java.io.IOException) Constants(org.apache.accumulo.core.Constants) CryptoServiceFactory(org.apache.accumulo.core.crypto.CryptoServiceFactory) AccumuloException(org.apache.accumulo.core.client.AccumuloException) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) Range(org.apache.accumulo.core.data.Range) ExecutionException(java.util.concurrent.ExecutionException) TreeMap(java.util.TreeMap) Preconditions(com.google.common.base.Preconditions) Cache(com.google.common.cache.Cache) Collections(java.util.Collections) ClientProperty(org.apache.accumulo.core.conf.ClientProperty) Destination(org.apache.accumulo.core.data.LoadPlan.Destination) FileStatus(org.apache.hadoop.fs.FileStatus) TreeMap(java.util.TreeMap) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) AccumuloBulkMergeException(org.apache.accumulo.core.clientImpl.AccumuloBulkMergeException) CompletionException(java.util.concurrent.CompletionException) FileNotFoundException(java.io.FileNotFoundException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) ExecutionException(java.util.concurrent.ExecutionException) FileInfo(org.apache.accumulo.core.clientImpl.bulk.Bulk.FileInfo) Objects(java.util.Objects) List(java.util.List) ArrayList(java.util.ArrayList) Files(org.apache.accumulo.core.clientImpl.bulk.Bulk.Files)

Aggregations

Preconditions (com.google.common.base.Preconditions)1 Cache (com.google.common.cache.Cache)1 CacheBuilder (com.google.common.cache.CacheBuilder)1 Sets (com.google.common.collect.Sets)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 ByteBuffer (java.nio.ByteBuffer)1 UTF_8 (java.nio.charset.StandardCharsets.UTF_8)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1 Entry (java.util.Map.Entry)1 Objects (java.util.Objects)1 Set (java.util.Set)1 SortedMap (java.util.SortedMap)1