Search in sources :

Example 1 with FileIdsAndEndings

use of io.datarouter.filesystem.snapshot.writer.BlockQueue.FileIdsAndEndings in project datarouter by hotpads.

the class SnapshotBlockWriter method flushBranch.

private void flushBranch(BranchBlockEncoder encoder) {
    tracker.branchTasks.increment();
    Map<Integer, Future<?>> futureByBlockId = branchFutureByBlockIdByLevel.computeIfAbsent(encoder.level(), ConcurrentHashMap::new);
    Future<?> future = exec.submit(() -> {
        int firstChildBlockId = encoder.firstChildBlockId();
        int numRecords = encoder.numRecords();
        // includes final endings from previous block
        int numEndings = numRecords + 1;
        FileIdsAndEndings fileIdsAndEndings;
        if (encoder.level() == 0) {
            fileIdsAndEndings = fileWriter.leafFileInfo(// -1 to get the previous block's info
            firstChildBlockId - 1, numEndings);
        } else {
            int childLevel = encoder.level() - 1;
            fileIdsAndEndings = fileWriter.branchFileInfo(childLevel, // -1 to get the previous block's info
            firstChildBlockId - 1, numEndings);
        }
        EncodedBlock encodedPages = encoder.encode(fileIdsAndEndings);
        CompressedBlock compressedBlock = config.branchBlockCompressor.compress(encodedPages, config.compressorConcatChunks);
        tracker.branchBlock(encodedPages, compressedBlock);
        fileWriter.addBranchBlock(encoder.level(), encoder.blockId(), compressedBlock);
        if (blockStorage != null && config.updateCache) {
            CacheBlockKey cacheBlockKey = CacheBlockKey.branch(snapshotKey, encoder.level(), encoder.blockId());
            blockStorage.addBranchBlock(paths, cacheBlockKey, compressedBlock);
        }
        futureByBlockId.remove(encoder.blockId());
        tracker.branchTasks.decrement();
    });
    futureByBlockId.put(encoder.blockId(), future);
}
Also used : EncodedBlock(io.datarouter.filesystem.snapshot.encode.EncodedBlock) CompressedBlock(io.datarouter.filesystem.snapshot.compress.CompressedBlock) Future(java.util.concurrent.Future) CacheBlockKey(io.datarouter.filesystem.snapshot.storage.block.CacheBlockKey) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) FileIdsAndEndings(io.datarouter.filesystem.snapshot.writer.BlockQueue.FileIdsAndEndings)

Example 2 with FileIdsAndEndings

use of io.datarouter.filesystem.snapshot.writer.BlockQueue.FileIdsAndEndings in project datarouter by hotpads.

the class SnapshotBlockWriter method flushLeaf.

private void flushLeaf(LeafBlockEncoder encoder) {
    leafBackpressure();
    tracker.leafTasks.increment();
    Future<?> future = exec.submit(() -> {
        if (config.sorted) {
            // primary writer thread only validates sorting between key blocks
            encoder.assertKeysSorted();
        }
        var fileIdsAndEndings = new FileIdsAndEndings[config.numColumns];
        for (int column = 0; column < config.numColumns; ++column) {
            int firstValueBlockId = encoder.firstValueBlockId(column);
            int numValueBlocks = encoder.numValueBlocks(column);
            // +1 for previous block final endings
            int numEndings = numValueBlocks + 1;
            fileIdsAndEndings[column] = fileWriter.valueFileInfo(column, // -1 to get the previous block's info
            firstValueBlockId - 1, numEndings);
        }
        EncodedBlock encodedPages = encoder.encode(fileIdsAndEndings);
        CompressedBlock compressedBytes = config.leafBlockCompressor.compress(encodedPages, config.compressorConcatChunks);
        tracker.leafBlock(encodedPages, compressedBytes);
        fileWriter.addLeafBlock(encoder.blockId(), compressedBytes);
        if (blockStorage != null && config.updateCache) {
            CacheBlockKey cacheBlockKey = CacheBlockKey.leaf(snapshotKey, encoder.blockId());
            blockStorage.addLeafBlock(paths, cacheBlockKey, compressedBytes);
        }
        leafFutureByBlockId.remove(encoder.blockId());
        tracker.leafTasks.decrement();
    });
    leafFutureByBlockId.put(encoder.blockId(), future);
}
Also used : EncodedBlock(io.datarouter.filesystem.snapshot.encode.EncodedBlock) CompressedBlock(io.datarouter.filesystem.snapshot.compress.CompressedBlock) CacheBlockKey(io.datarouter.filesystem.snapshot.storage.block.CacheBlockKey) FileIdsAndEndings(io.datarouter.filesystem.snapshot.writer.BlockQueue.FileIdsAndEndings)

Example 3 with FileIdsAndEndings

use of io.datarouter.filesystem.snapshot.writer.BlockQueue.FileIdsAndEndings in project datarouter by hotpads.

the class WordTests method testLeafBlockV1.

@Test
public void testLeafBlockV1() {
    Supplier<LeafBlockV1Encoder> encoderSupplier = () -> new LeafBlockV1Encoder(32 * 1024);
    Ref<LeafBlockV1Encoder> encoder = new Ref<>(encoderSupplier.get());
    int blockSize = 4096;
    String valuePrefix = "val_";
    List<SnapshotEntry> inputs = WordDataset.scanWords(getClass().getSimpleName() + "-testLeafBlockV1").map(word -> {
        byte[] keyBytes = word.getBytes(StandardCharsets.UTF_8);
        byte[] valueBytes = ByteTool.concat(valuePrefix.getBytes(StandardCharsets.UTF_8), keyBytes);
        return new SnapshotEntry(keyBytes, valueBytes, ByteTool.EMPTY_ARRAY_2);
    }).list();
    var keyId = new AtomicLong();
    List<byte[]> blocks = Scanner.of(inputs).concat(entry -> {
        // TODO use real value block references
        encoder.get().add(0, keyId.getAndIncrement(), entry, new int[] { 0 }, new int[] { 0 });
        if (encoder.get().numBytes() >= blockSize) {
            var fileIdsAndEndings = new FileIdsAndEndings[] { new FileIdsAndEndings(new int[] { 0 }, new int[] { 0 }) };
            byte[] block = encoder.get().encode(fileIdsAndEndings).concat();
            encoder.set(encoderSupplier.get());
            return Scanner.of(block);
        }
        return Scanner.empty();
    }).list();
    if (encoder.get().numRecords() > 0) {
        var fileIdsAndEndings = new FileIdsAndEndings[] { new FileIdsAndEndings(new int[] { 0 }, new int[] { 0 }) };
        blocks.add(encoder.get().encode(fileIdsAndEndings).concat());
    }
    logger.warn("encoded {} key blocks", blocks.size());
    List<SnapshotLeafRecord> outputs = Scanner.of(blocks).map(LeafBlockV1::new).concat(LeafBlock::leafRecords).list();
    Require.equals(outputs.size(), inputs.size());
    for (int i = 0; i < outputs.size(); ++i) {
        if (!Arrays.equals(outputs.get(i).key, inputs.get(i).key())) {
            logger.warn("actual=[{}] expected=[{}]", outputs.get(i), inputs.get(i));
            String message = String.format("key: actual=[%s] does not equal expected=[%s]", CsvIntByteStringCodec.INSTANCE.encode(outputs.get(i).key), CsvIntByteStringCodec.INSTANCE.encode(inputs.get(i).key()));
            throw new IllegalArgumentException(message);
        }
        if (!Arrays.equals(outputs.get(i).value, inputs.get(i).value())) {
            logger.warn("actual=[{}] expected=[{}]", outputs.get(i), inputs.get(i));
            String message = String.format("value: actual=[%s] does not equal expected=[%s]", CsvIntByteStringCodec.INSTANCE.encode(outputs.get(i).value), CsvIntByteStringCodec.INSTANCE.encode(inputs.get(i).value()));
            throw new IllegalArgumentException(message);
        }
    }
}
Also used : ValueBlockV1(io.datarouter.filesystem.snapshot.block.value.ValueBlockV1) RetainingGroup(io.datarouter.scanner.RetainingGroup) Scanner(io.datarouter.scanner.Scanner) Arrays(java.util.Arrays) LeafBlockV1Encoder(io.datarouter.filesystem.snapshot.block.leaf.LeafBlockV1Encoder) FileIdsAndEndings(io.datarouter.filesystem.snapshot.writer.BlockQueue.FileIdsAndEndings) ByteTool(io.datarouter.bytes.ByteTool) ObjectTool(io.datarouter.util.lang.ObjectTool) LoggerFactory(org.slf4j.LoggerFactory) Test(org.testng.annotations.Test) Supplier(java.util.function.Supplier) LeafBlock(io.datarouter.filesystem.snapshot.block.leaf.LeafBlock) EmptyArray(io.datarouter.bytes.EmptyArray) Assert(org.testng.Assert) SnapshotLeafRecord(io.datarouter.filesystem.snapshot.reader.record.SnapshotLeafRecord) Logger(org.slf4j.Logger) CsvIntByteStringCodec(io.datarouter.bytes.codec.bytestringcodec.CsvIntByteStringCodec) ValueBlockV1Encoder(io.datarouter.filesystem.snapshot.block.value.ValueBlockV1Encoder) SnapshotEntry(io.datarouter.filesystem.snapshot.entry.SnapshotEntry) LeafBlockV1(io.datarouter.filesystem.snapshot.block.leaf.LeafBlockV1) StandardCharsets(java.nio.charset.StandardCharsets) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) ValueBlock(io.datarouter.filesystem.snapshot.block.value.ValueBlock) Ref(io.datarouter.scanner.Ref) Require(io.datarouter.util.Require) SnapshotLeafRecord(io.datarouter.filesystem.snapshot.reader.record.SnapshotLeafRecord) Ref(io.datarouter.scanner.Ref) AtomicLong(java.util.concurrent.atomic.AtomicLong) LeafBlockV1Encoder(io.datarouter.filesystem.snapshot.block.leaf.LeafBlockV1Encoder) SnapshotEntry(io.datarouter.filesystem.snapshot.entry.SnapshotEntry) LeafBlockV1(io.datarouter.filesystem.snapshot.block.leaf.LeafBlockV1) FileIdsAndEndings(io.datarouter.filesystem.snapshot.writer.BlockQueue.FileIdsAndEndings) Test(org.testng.annotations.Test)

Aggregations

FileIdsAndEndings (io.datarouter.filesystem.snapshot.writer.BlockQueue.FileIdsAndEndings)3 CompressedBlock (io.datarouter.filesystem.snapshot.compress.CompressedBlock)2 EncodedBlock (io.datarouter.filesystem.snapshot.encode.EncodedBlock)2 CacheBlockKey (io.datarouter.filesystem.snapshot.storage.block.CacheBlockKey)2 ByteTool (io.datarouter.bytes.ByteTool)1 EmptyArray (io.datarouter.bytes.EmptyArray)1 CsvIntByteStringCodec (io.datarouter.bytes.codec.bytestringcodec.CsvIntByteStringCodec)1 LeafBlock (io.datarouter.filesystem.snapshot.block.leaf.LeafBlock)1 LeafBlockV1 (io.datarouter.filesystem.snapshot.block.leaf.LeafBlockV1)1 LeafBlockV1Encoder (io.datarouter.filesystem.snapshot.block.leaf.LeafBlockV1Encoder)1 ValueBlock (io.datarouter.filesystem.snapshot.block.value.ValueBlock)1 ValueBlockV1 (io.datarouter.filesystem.snapshot.block.value.ValueBlockV1)1 ValueBlockV1Encoder (io.datarouter.filesystem.snapshot.block.value.ValueBlockV1Encoder)1 SnapshotEntry (io.datarouter.filesystem.snapshot.entry.SnapshotEntry)1 SnapshotLeafRecord (io.datarouter.filesystem.snapshot.reader.record.SnapshotLeafRecord)1 Ref (io.datarouter.scanner.Ref)1 RetainingGroup (io.datarouter.scanner.RetainingGroup)1 Scanner (io.datarouter.scanner.Scanner)1 Require (io.datarouter.util.Require)1 ObjectTool (io.datarouter.util.lang.ObjectTool)1