Search in sources :

Example 6 with Files

use of org.apache.accumulo.core.clientImpl.bulk.Bulk.Files in project accumulo by apache.

the class LoadFiles method loadFiles.

/**
 * Make asynchronous load calls to each overlapping Tablet in the bulk mapping. Return a sleep
 * time to isReady based on a factor of the TabletServer with the most Tablets. This method will
 * scan the metadata table getting Tablet range and location information. It will return 0 when
 * all files have been loaded.
 */
private long loadFiles(TableId tableId, Path bulkDir, LoadMappingIterator loadMapIter, Manager manager, long tid) throws Exception {
    PeekingIterator<Map.Entry<KeyExtent, Bulk.Files>> lmi = new PeekingIterator<>(loadMapIter);
    Map.Entry<KeyExtent, Bulk.Files> loadMapEntry = lmi.peek();
    Text startRow = loadMapEntry.getKey().prevEndRow();
    Iterator<TabletMetadata> tabletIter = TabletsMetadata.builder(manager.getContext()).forTable(tableId).overlapping(startRow, null).checkConsistency().fetch(PREV_ROW, LOCATION, LOADED).build().iterator();
    Loader loader;
    if (bulkInfo.tableState == TableState.ONLINE) {
        loader = new OnlineLoader();
    } else {
        loader = new OfflineLoader();
    }
    loader.start(bulkDir, manager, tid, bulkInfo.setTime);
    long t1 = System.currentTimeMillis();
    while (lmi.hasNext()) {
        loadMapEntry = lmi.next();
        List<TabletMetadata> tablets = findOverlappingTablets(loadMapEntry.getKey(), tabletIter);
        loader.load(tablets, loadMapEntry.getValue());
    }
    long sleepTime = loader.finish();
    if (sleepTime > 0) {
        long scanTime = Math.min(System.currentTimeMillis() - t1, 30000);
        sleepTime = Math.max(sleepTime, scanTime * 2);
    }
    return sleepTime;
}
Also used : Text(org.apache.hadoop.io.Text) PeekingIterator(org.apache.accumulo.core.util.PeekingIterator) Bulk(org.apache.accumulo.core.clientImpl.bulk.Bulk) TKeyExtent(org.apache.accumulo.core.dataImpl.thrift.TKeyExtent) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) TabletMetadata(org.apache.accumulo.core.metadata.schema.TabletMetadata) Files(org.apache.accumulo.core.clientImpl.bulk.Bulk.Files) Map(java.util.Map) HashMap(java.util.HashMap)

Example 7 with Files

use of org.apache.accumulo.core.clientImpl.bulk.Bulk.Files in project accumulo by apache.

the class BulkSerializeTest method testRemap.

@Test
public void testRemap() throws Exception {
    TableId tableId = TableId.of("3");
    SortedMap<KeyExtent, Bulk.Files> mapping = generateMapping(tableId);
    SortedMap<KeyExtent, Bulk.Files> newNameMapping = new TreeMap<>();
    Map<String, String> nameMap = new HashMap<>();
    mapping.forEach((extent, files) -> {
        Files newFiles = new Files();
        files.forEach(fi -> {
            newFiles.add(new FileInfo("N" + fi.name, fi.estSize, fi.estEntries));
            nameMap.put(fi.name, "N" + fi.name);
        });
        newNameMapping.put(extent, newFiles);
    });
    ByteArrayOutputStream mappingBaos = new ByteArrayOutputStream();
    ByteArrayOutputStream nameBaos = new ByteArrayOutputStream();
    BulkSerialize.writeRenameMap(nameMap, "/some/dir", p -> nameBaos);
    BulkSerialize.writeLoadMapping(mapping, "/some/dir", p -> mappingBaos);
    Input input = p -> {
        if (p.getName().equals(Constants.BULK_LOAD_MAPPING)) {
            return new ByteArrayInputStream(mappingBaos.toByteArray());
        } else if (p.getName().equals(Constants.BULK_RENAME_FILE)) {
            return new ByteArrayInputStream(nameBaos.toByteArray());
        } else {
            throw new IllegalArgumentException("bad path " + p);
        }
    };
    try (LoadMappingIterator lmi = BulkSerialize.getUpdatedLoadMapping("/some/dir", tableId, input)) {
        SortedMap<KeyExtent, Bulk.Files> actual = new TreeMap<>();
        lmi.forEachRemaining(e -> actual.put(e.getKey(), e.getValue()));
        assertEquals(newNameMapping, actual);
    }
}
Also used : TableId(org.apache.accumulo.core.data.TableId) TableId(org.apache.accumulo.core.data.TableId) ByteArrayOutputStream(java.io.ByteArrayOutputStream) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) Text(org.apache.hadoop.io.Text) HashMap(java.util.HashMap) Constants(org.apache.accumulo.core.Constants) Test(org.junit.jupiter.api.Test) ByteArrayInputStream(java.io.ByteArrayInputStream) TreeMap(java.util.TreeMap) Map(java.util.Map) Input(org.apache.accumulo.core.clientImpl.bulk.BulkSerialize.Input) FileInfo(org.apache.accumulo.core.clientImpl.bulk.Bulk.FileInfo) Files(org.apache.accumulo.core.clientImpl.bulk.Bulk.Files) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) ByteArrayOutputStream(java.io.ByteArrayOutputStream) TreeMap(java.util.TreeMap) KeyExtent(org.apache.accumulo.core.dataImpl.KeyExtent) Input(org.apache.accumulo.core.clientImpl.bulk.BulkSerialize.Input) FileInfo(org.apache.accumulo.core.clientImpl.bulk.Bulk.FileInfo) ByteArrayInputStream(java.io.ByteArrayInputStream) Files(org.apache.accumulo.core.clientImpl.bulk.Bulk.Files) Test(org.junit.jupiter.api.Test)

Aggregations

Files (org.apache.accumulo.core.clientImpl.bulk.Bulk.Files)7 KeyExtent (org.apache.accumulo.core.dataImpl.KeyExtent)7 Text (org.apache.hadoop.io.Text)5 HashMap (java.util.HashMap)4 Map (java.util.Map)4 FileInfo (org.apache.accumulo.core.clientImpl.bulk.Bulk.FileInfo)4 TableId (org.apache.accumulo.core.data.TableId)4 Path (org.apache.hadoop.fs.Path)4 ByteBuffer (java.nio.ByteBuffer)3 Entry (java.util.Map.Entry)3 SortedMap (java.util.SortedMap)3 TreeMap (java.util.TreeMap)3 Preconditions (com.google.common.base.Preconditions)2 Cache (com.google.common.cache.Cache)2 CacheBuilder (com.google.common.cache.CacheBuilder)2 Sets (com.google.common.collect.Sets)2 FileNotFoundException (java.io.FileNotFoundException)2 IOException (java.io.IOException)2 UTF_8 (java.nio.charset.StandardCharsets.UTF_8)2 ArrayList (java.util.ArrayList)2