Search in sources :

Example 1 with FileInfo

use of org.apache.accumulo.core.clientImpl.bulk.Bulk.FileInfo in project accumulo by apache.

the class BulkImportTest method addMapping.

private void addMapping(SortedMap<KeyExtent, Files> mappings, String prevRow, String endRow, String... fileNames) {
    KeyExtent ke = new KeyExtent(TableId.of("42"), endRow == null ? null : new Text(endRow), prevRow == null ? null : new Text(prevRow));
    Files files = new Files();
    for (String name : fileNames) {
        files.add(new FileInfo(name, 2, 2));
    }
    mappings.put(ke, files);
}
Also used : FileInfo(org.apache.accumulo.core.clientImpl.bulk.Bulk.FileInfo) Text(org.apache.hadoop.io.Text) Files(org.apache.accumulo.core.clientImpl.bulk.Bulk.Files) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent)

Example 2 with FileInfo

use of org.apache.accumulo.core.clientImpl.bulk.Bulk.FileInfo in project accumulo by apache.

the class BulkSerializeTest method testRemap.

@Test
public void testRemap() throws Exception {
    TableId tableId = TableId.of("3");
    SortedMap<KeyExtent, Bulk.Files> mapping = generateMapping(tableId);
    SortedMap<KeyExtent, Bulk.Files> newNameMapping = new TreeMap<>();
    Map<String, String> nameMap = new HashMap<>();
    mapping.forEach((extent, files) -> {
        Files newFiles = new Files();
        files.forEach(fi -> {
            newFiles.add(new FileInfo("N" + fi.name, fi.estSize, fi.estEntries));
            nameMap.put(fi.name, "N" + fi.name);
        });
        newNameMapping.put(extent, newFiles);
    });
    ByteArrayOutputStream mappingBaos = new ByteArrayOutputStream();
    ByteArrayOutputStream nameBaos = new ByteArrayOutputStream();
    BulkSerialize.writeRenameMap(nameMap, "/some/dir", p -> nameBaos);
    BulkSerialize.writeLoadMapping(mapping, "/some/dir", p -> mappingBaos);
    Input input = p -> {
        if (p.getName().equals(Constants.BULK_LOAD_MAPPING)) {
            return new ByteArrayInputStream(mappingBaos.toByteArray());
        } else if (p.getName().equals(Constants.BULK_RENAME_FILE)) {
            return new ByteArrayInputStream(nameBaos.toByteArray());
        } else {
            throw new IllegalArgumentException("bad path " + p);
        }
    };
    try (LoadMappingIterator lmi = BulkSerialize.getUpdatedLoadMapping("/some/dir", tableId, input)) {
        SortedMap<KeyExtent, Bulk.Files> actual = new TreeMap<>();
        lmi.forEachRemaining(e -> actual.put(e.getKey(), e.getValue()));
        assertEquals(newNameMapping, actual);
    }
}
Also used : TableId(org.apache.accumulo.core.data.TableId) TableId(org.apache.accumulo.core.data.TableId) ByteArrayOutputStream(java.io.ByteArrayOutputStream) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) Text(org.apache.hadoop.io.Text) HashMap(java.util.HashMap) Constants(org.apache.accumulo.core.Constants) Test(org.junit.jupiter.api.Test) ByteArrayInputStream(java.io.ByteArrayInputStream) TreeMap(java.util.TreeMap) Map(java.util.Map) Input(org.apache.accumulo.core.clientImpl.bulk.BulkSerialize.Input) FileInfo(org.apache.accumulo.core.clientImpl.bulk.Bulk.FileInfo) Files(org.apache.accumulo.core.clientImpl.bulk.Bulk.Files) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) ByteArrayOutputStream(java.io.ByteArrayOutputStream) TreeMap(java.util.TreeMap) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) Input(org.apache.accumulo.core.clientImpl.bulk.BulkSerialize.Input) FileInfo(org.apache.accumulo.core.clientImpl.bulk.Bulk.FileInfo) ByteArrayInputStream(java.io.ByteArrayInputStream) Files(org.apache.accumulo.core.clientImpl.bulk.Bulk.Files) Test(org.junit.jupiter.api.Test)

Example 3 with FileInfo

use of org.apache.accumulo.core.clientImpl.bulk.Bulk.FileInfo in project accumulo by apache.

the class BulkImport method computeMappingFromPlan.

private SortedMap<KeyExtent, Files> computeMappingFromPlan(FileSystem fs, TableId tableId, Path srcPath, int maxTablets) throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException {
    Map<String, List<Destination>> fileDestinations = plan.getDestinations().stream().collect(groupingBy(Destination::getFileName));
    List<FileStatus> statuses = filterInvalid(fs.listStatus(srcPath, p -> !p.getName().equals(Constants.BULK_LOAD_MAPPING)));
    Map<String, Long> fileLens = getFileLenMap(statuses);
    if (!fileDestinations.keySet().equals(fileLens.keySet())) {
        throw new IllegalArgumentException("Load plan files differ from directory files, symmetric difference : " + Sets.symmetricDifference(fileDestinations.keySet(), fileLens.keySet()));
    }
    KeyExtentCache extentCache = new ConcurrentKeyExtentCache(tableId, context);
    // Pre-populate cache by looking up all end rows in sorted order. Doing this in sorted order
    // leverages read ahead.
    fileDestinations.values().stream().flatMap(List::stream).filter(dest -> dest.getRangeType() == RangeType.FILE).flatMap(dest -> Stream.of(dest.getStartRow(), dest.getEndRow())).filter(Objects::nonNull).map(Text::new).sorted().distinct().forEach(row -> {
        try {
            extentCache.lookup(row);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    });
    SortedMap<KeyExtent, Files> mapping = new TreeMap<>();
    for (Entry<String, List<Destination>> entry : fileDestinations.entrySet()) {
        String fileName = entry.getKey();
        List<Destination> destinations = entry.getValue();
        Set<KeyExtent> extents = mapDestinationsToExtents(tableId, extentCache, destinations);
        log.debug("The file {} mapped to {} tablets.", fileName, extents.size());
        checkTabletCount(maxTablets, extents.size(), fileName);
        long estSize = (long) (fileLens.get(fileName) / (double) extents.size());
        for (KeyExtent keyExtent : extents) {
            mapping.computeIfAbsent(keyExtent, k -> new Files()).add(new FileInfo(fileName, estSize, 0));
        }
    }
    return mergeOverlapping(mapping);
}
Also used : TableId(org.apache.accumulo.core.data.TableId) ByteSequence(org.apache.accumulo.core.data.ByteSequence) Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) LoggerFactory(org.slf4j.LoggerFactory) Text(org.apache.hadoop.io.Text) FileStatus(org.apache.hadoop.fs.FileStatus) ByteBuffer(java.nio.ByteBuffer) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) CachableBlockFile.pathToCacheId(org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.pathToCacheId) ConfigurationTypeHelper(org.apache.accumulo.core.conf.ConfigurationTypeHelper) FileOperations(org.apache.accumulo.core.file.FileOperations) AccumuloBulkMergeException(org.apache.accumulo.core.clientImpl.AccumuloBulkMergeException) TableOperationsImpl(org.apache.accumulo.core.clientImpl.TableOperationsImpl) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) FileInfo(org.apache.accumulo.core.clientImpl.bulk.Bulk.FileInfo) Property(org.apache.accumulo.core.conf.Property) LoadPlan(org.apache.accumulo.core.data.LoadPlan) ClientContext(org.apache.accumulo.core.clientImpl.ClientContext) Collection(java.util.Collection) FileSKVIterator(org.apache.accumulo.core.file.FileSKVIterator) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) ThreadPools(org.apache.accumulo.core.util.threads.ThreadPools) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) RangeType(org.apache.accumulo.core.data.LoadPlan.RangeType) FileNotFoundException(java.io.FileNotFoundException) Sets(com.google.common.collect.Sets) VolumeConfiguration(org.apache.accumulo.core.volume.VolumeConfiguration) Objects(java.util.Objects) List(java.util.List) Stream(java.util.stream.Stream) Entry(java.util.Map.Entry) Files(org.apache.accumulo.core.clientImpl.bulk.Bulk.Files) ImportDestinationArguments(org.apache.accumulo.core.client.admin.TableOperations.ImportDestinationArguments) ImportMappingOptions(org.apache.accumulo.core.client.admin.TableOperations.ImportMappingOptions) CacheBuilder(com.google.common.cache.CacheBuilder) SortedMap(java.util.SortedMap) FilenameUtils(org.apache.commons.io.FilenameUtils) Collectors.groupingBy(java.util.stream.Collectors.groupingBy) Destination(org.apache.accumulo.core.data.LoadPlan.Destination) MINUTES(java.util.concurrent.TimeUnit.MINUTES) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) Key(org.apache.accumulo.core.data.Key) ExecutorService(java.util.concurrent.ExecutorService) EXISTING_TABLE_NAME(org.apache.accumulo.core.util.Validators.EXISTING_TABLE_NAME) Retry(org.apache.accumulo.fate.util.Retry) Logger(org.slf4j.Logger) CryptoService(org.apache.accumulo.core.spi.crypto.CryptoService) Executor(java.util.concurrent.Executor) UTF_8(java.nio.charset.StandardCharsets.UTF_8) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) IOException(java.io.IOException) Constants(org.apache.accumulo.core.Constants) CryptoServiceFactory(org.apache.accumulo.core.crypto.CryptoServiceFactory) AccumuloException(org.apache.accumulo.core.client.AccumuloException) AccumuloConfiguration(org.apache.accumulo.core.conf.AccumuloConfiguration) Range(org.apache.accumulo.core.data.Range) ExecutionException(java.util.concurrent.ExecutionException) TreeMap(java.util.TreeMap) Preconditions(com.google.common.base.Preconditions) Cache(com.google.common.cache.Cache) Collections(java.util.Collections) ClientProperty(org.apache.accumulo.core.conf.ClientProperty) Destination(org.apache.accumulo.core.data.LoadPlan.Destination) FileStatus(org.apache.hadoop.fs.FileStatus) TreeMap(java.util.TreeMap) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) AccumuloBulkMergeException(org.apache.accumulo.core.clientImpl.AccumuloBulkMergeException) CompletionException(java.util.concurrent.CompletionException) FileNotFoundException(java.io.FileNotFoundException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) ExecutionException(java.util.concurrent.ExecutionException) FileInfo(org.apache.accumulo.core.clientImpl.bulk.Bulk.FileInfo) Objects(java.util.Objects) List(java.util.List) ArrayList(java.util.ArrayList) Files(org.apache.accumulo.core.clientImpl.bulk.Bulk.Files)

Aggregations

FileInfo (org.apache.accumulo.core.clientImpl.bulk.Bulk.FileInfo)3 Files (org.apache.accumulo.core.clientImpl.bulk.Bulk.Files)3 KeyExtent (org.apache.accumulo.core.dataImpl.KeyExtent)3 Text (org.apache.hadoop.io.Text)3 HashMap (java.util.HashMap)2 Map (java.util.Map)2 SortedMap (java.util.SortedMap)2 TreeMap (java.util.TreeMap)2 Constants (org.apache.accumulo.core.Constants)2 TableId (org.apache.accumulo.core.data.TableId)2 Preconditions (com.google.common.base.Preconditions)1 Cache (com.google.common.cache.Cache)1 CacheBuilder (com.google.common.cache.CacheBuilder)1 Sets (com.google.common.collect.Sets)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 ByteBuffer (java.nio.ByteBuffer)1 UTF_8 (java.nio.charset.StandardCharsets.UTF_8)1