Search in sources :

Example 1 with SnapshotLeafRecord

use of io.datarouter.filesystem.snapshot.reader.record.SnapshotLeafRecord in project datarouter by hotpads.

the class SnapshotMerger method combineSnapshots.

private SnapshotWriteResult combineSnapshots(List<SnapshotKey> keys, SnapshotGroup outputGroup) {
    SnapshotWriteResult result = Scanner.of(keys).map(key -> new ScanningSnapshotReader(key, readExec, 10, mergeGroup, scanNumBlocks)).collate(reader -> reader.scanLeafRecords(0), SnapshotLeafRecord.KEY_COMPARATOR).deduplicateConsecutiveBy(leafRecord -> leafRecord.key, Arrays::equals).map(SnapshotLeafRecord::entry).batch(10_000).apply(batches -> outputGroup.writeOps().write(writerConfig, batches, writeExec, shouldStop));
    keys.forEach(key -> mergeGroup.deleteOps().deleteSnapshot(key, writeExec, 10));
    logger.warn("combined {}, {}", keys.size(), keys);
    return result;
}
Also used : Scanner(io.datarouter.scanner.Scanner) Arrays(java.util.Arrays) Logger(org.slf4j.Logger) LoggerFactory(org.slf4j.LoggerFactory) SnapshotGroup(io.datarouter.filesystem.snapshot.group.SnapshotGroup) Supplier(java.util.function.Supplier) SnapshotKeyAndNumRecords(io.datarouter.filesystem.snapshot.group.dto.SnapshotKeyAndNumRecords) SnapshotWriterConfig(io.datarouter.filesystem.snapshot.writer.SnapshotWriterConfig) List(java.util.List) SnapshotWriteResult(io.datarouter.filesystem.snapshot.group.dto.SnapshotWriteResult) ScanningSnapshotReader(io.datarouter.filesystem.snapshot.reader.ScanningSnapshotReader) SnapshotLeafRecord(io.datarouter.filesystem.snapshot.reader.record.SnapshotLeafRecord) Map(java.util.Map) SnapshotKey(io.datarouter.filesystem.snapshot.key.SnapshotKey) ExecutorService(java.util.concurrent.ExecutorService) ScanningSnapshotReader(io.datarouter.filesystem.snapshot.reader.ScanningSnapshotReader) SnapshotLeafRecord(io.datarouter.filesystem.snapshot.reader.record.SnapshotLeafRecord) SnapshotWriteResult(io.datarouter.filesystem.snapshot.group.dto.SnapshotWriteResult)

Example 2 with SnapshotLeafRecord

use of io.datarouter.filesystem.snapshot.reader.record.SnapshotLeafRecord in project datarouter by hotpads.

the class BaseSnapshotTests method testSearches.

@Test
public void testSearches() {
    if (!ENABLED_TESTS.contains(TestId.SEARCHES)) {
        return;
    }
    BlockLoader blockLoader = makeBlockLoader(useMemoryCache(), shareMemoryCache());
    var reader = new ScanningSnapshotReader(snapshotKey, exec, getNumThreads(), blockLoader, SCAN_NUM_BLOCKS);
    int step = 1000;
    int limit = 1000;
    Scanner.iterate(0, fromId -> fromId += step).advanceWhile(fromId -> fromId < sortedInputs.size() - limit).parallel(new ParallelScannerContext(scanExec, getNumThreads(), true)).forEach(fromId -> {
        var idReader = new SnapshotIdReader(snapshotKey, blockLoader);
        // known first key inclusive
        byte[] searchKey = idReader.getRecord(fromId).key;
        List<SnapshotLeafRecord> outputsInclusive = reader.scanLeafRecords(searchKey, true).limit(limit).list();
        for (int i = 0; i < limit; ++i) {
            Input input = sortedInputs.get(fromId + i);
            SnapshotLeafRecord output = outputsInclusive.get(i);
            Assert.assertEquals(fromId + i, output.id);
            Assert.assertEquals(new Bytes(input.entry.key()), new Bytes(output.key));
        }
        // known first key exclusive
        List<SnapshotLeafRecord> outputsExclusive = reader.scanLeafRecords(searchKey, false).limit(limit).list();
        for (int i = 0; i < limit; ++i) {
            // plus one because exclusive
            Input input = sortedInputs.get(fromId + i + 1);
            SnapshotLeafRecord output = outputsExclusive.get(i);
            Assert.assertEquals(input.id, output.id);
            Assert.assertEquals(new Bytes(input.entry.key()), new Bytes(output.key));
        }
        // fake first key (should act like exclusive)
        byte[] nonExistentKey = ByteTool.concat(searchKey, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0 });
        List<SnapshotLeafRecord> outputsNonExistentKey = reader.scanLeafRecords(nonExistentKey, true).limit(limit).list();
        for (int i = 0; i < limit; ++i) {
            // plus one because the first key didn't exist
            Input input = sortedInputs.get(fromId + i + 1);
            SnapshotLeafRecord output = outputsNonExistentKey.get(i);
            Assert.assertEquals(input.id, output.id);
            Assert.assertEquals(new Bytes(input.entry.key()), new Bytes(output.key));
        }
    });
}
Also used : ScanningSnapshotReader(io.datarouter.filesystem.snapshot.reader.ScanningSnapshotReader) IntStream(java.util.stream.IntStream) SnapshotRecord(io.datarouter.filesystem.snapshot.reader.record.SnapshotRecord) Scanner(io.datarouter.scanner.Scanner) Arrays(java.util.Arrays) SnapshotKeyReader(io.datarouter.filesystem.snapshot.reader.SnapshotKeyReader) ByteTool(io.datarouter.bytes.ByteTool) BlockKey(io.datarouter.filesystem.snapshot.block.BlockKey) ParallelScannerContext(io.datarouter.scanner.ParallelScannerContext) LoggerFactory(org.slf4j.LoggerFactory) Test(org.testng.annotations.Test) Bytes(io.datarouter.bytes.Bytes) GzipBlockCompressor(io.datarouter.filesystem.snapshot.compress.GzipBlockCompressor) SnapshotWriterConfig(io.datarouter.filesystem.snapshot.writer.SnapshotWriterConfig) NumberFormatter(io.datarouter.util.number.NumberFormatter) SnapshotWriteResult(io.datarouter.filesystem.snapshot.group.dto.SnapshotWriteResult) Assert(org.testng.Assert) ScanningSnapshotReader(io.datarouter.filesystem.snapshot.reader.ScanningSnapshotReader) BlockLoader(io.datarouter.filesystem.snapshot.reader.block.BlockLoader) DatarouterFilesystemModuleFactory(io.datarouter.filesystem.DatarouterFilesystemModuleFactory) SnapshotLeafRecord(io.datarouter.filesystem.snapshot.reader.record.SnapshotLeafRecord) SnapshotWriterConfigBuilder(io.datarouter.filesystem.snapshot.writer.SnapshotWriterConfigBuilder) ExecutorService(java.util.concurrent.ExecutorService) PhaseTimer(io.datarouter.util.timer.PhaseTimer) SnapshotIdReader(io.datarouter.filesystem.snapshot.reader.SnapshotIdReader) AfterClass(org.testng.annotations.AfterClass) Logger(org.slf4j.Logger) BeforeClass(org.testng.annotations.BeforeClass) SnapshotGroup(io.datarouter.filesystem.snapshot.group.SnapshotGroup) Set(java.util.Set) MemoryBlockCache(io.datarouter.filesystem.snapshot.cache.MemoryBlockCache) SnapshotEntry(io.datarouter.filesystem.snapshot.entry.SnapshotEntry) StandardCharsets(java.nio.charset.StandardCharsets) Executors(java.util.concurrent.Executors) Guice(org.testng.annotations.Guice) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) Optional(java.util.Optional) SnapshotKey(io.datarouter.filesystem.snapshot.key.SnapshotKey) ListTool(io.datarouter.util.collection.ListTool) Require(io.datarouter.util.Require) Bytes(io.datarouter.bytes.Bytes) SnapshotLeafRecord(io.datarouter.filesystem.snapshot.reader.record.SnapshotLeafRecord) ParallelScannerContext(io.datarouter.scanner.ParallelScannerContext) BlockLoader(io.datarouter.filesystem.snapshot.reader.block.BlockLoader) SnapshotIdReader(io.datarouter.filesystem.snapshot.reader.SnapshotIdReader) Test(org.testng.annotations.Test)

Example 3 with SnapshotLeafRecord

use of io.datarouter.filesystem.snapshot.reader.record.SnapshotLeafRecord in project datarouter by hotpads.

the class BaseSnapshotTests method testOperationInternal.

private void testOperationInternal(BlockLoader threadSafeBlockLoader, boolean random, boolean multiThreaded, Operation operation) {
    List<Input> searchKeys = random ? randomInputs : sortedInputs;
    int batchSize = 10_000;
    var parallelScannerContext = new ParallelScannerContext(exec, getNumThreads(), true, multiThreaded);
    var count = new AtomicLong();
    Scanner.of(searchKeys).batch(batchSize).parallel(parallelScannerContext).forEach(batch -> {
        var idReader = new SnapshotIdReader(snapshotKey, threadSafeBlockLoader);
        var keyReader = new SnapshotKeyReader(snapshotKey, threadSafeBlockLoader);
        for (int i = 0; i < batch.size(); ++i) {
            Input input = batch.get(i);
            long id = input.id;
            byte[] key = input.entry.key();
            byte[] value = input.entry.value();
            if (Operation.GET_LEAF_RECORD == operation) {
                SnapshotLeafRecord leafRecord = idReader.leafRecord(id);
                if (!Arrays.equals(key, leafRecord.key)) {
                    String message = String.format("%s, expected=%s, actual=%s", id, utf8(key), utf8(leafRecord.key));
                    throw new RuntimeException(message);
                }
                if (!Arrays.equals(value, leafRecord.value)) {
                    String message = String.format("%s, expected=%s, actual=%s", id, utf8(value), utf8(leafRecord.value));
                    throw new RuntimeException(message);
                }
            } else if (Operation.GET_RECORD == operation) {
                SnapshotRecord result = idReader.getRecord(id);
                if (id != result.id) {
                    String message = String.format("%s, expected=%s, actual=%s", id, id, result.id);
                    throw new RuntimeException(message);
                }
                if (!Arrays.equals(key, result.key)) {
                    String message = String.format("%s, expected=%s, actual=%s", id, utf8(key), utf8(result.key));
                    throw new RuntimeException(message);
                }
                if (!SnapshotEntry.equal(input.entry, result.entry())) {
                    String message = String.format("%s, expected=%s, actual=%s", i, // TODO print more than column 0
                    utf8(input.entry.columnValues[0]), utf8(result.columnValues[0]));
                    throw new RuntimeException(message);
                }
            } else if (Operation.FIND_ID == operation) {
                if (keyReader.findRecordId(key).isEmpty()) {
                    String message = String.format("%s, %s not found", i, utf8(key));
                    throw new RuntimeException(message);
                }
                if (id != keyReader.findRecordId(key).get().longValue()) {
                    String message = String.format("%s, %s not found", i, utf8(key));
                    throw new RuntimeException(message);
                }
            } else if (Operation.FIND_RECORD == operation) {
                Optional<SnapshotRecord> output = keyReader.findRecord(key);
                if (output.isEmpty()) {
                    String message = String.format("%s, %s not found", i, utf8(key));
                    throw new RuntimeException(message);
                }
                if (!SnapshotEntry.equal(input.entry, output.get().entry())) {
                    String message = String.format("%s, expected=%s, actual=%s", i, // TODO print more than column 0
                    utf8(batch.get(i).entry.columnValues[0]), utf8(output.get().columnValues[0]));
                    throw new RuntimeException(message);
                }
            }
        }
        count.addAndGet(batch.size());
        logger.warn("{}, {}, {} for {}/{} {}", random ? "random" : "sorted", multiThreaded ? "multi" : "single", operation.toString().toLowerCase(), NumberFormatter.addCommas(count.get()), NumberFormatter.addCommas(searchKeys.size()), utf8(ListTool.getLast(batch).entry.key()));
    });
}
Also used : SnapshotLeafRecord(io.datarouter.filesystem.snapshot.reader.record.SnapshotLeafRecord) Optional(java.util.Optional) ParallelScannerContext(io.datarouter.scanner.ParallelScannerContext) SnapshotRecord(io.datarouter.filesystem.snapshot.reader.record.SnapshotRecord) SnapshotIdReader(io.datarouter.filesystem.snapshot.reader.SnapshotIdReader) AtomicLong(java.util.concurrent.atomic.AtomicLong) SnapshotKeyReader(io.datarouter.filesystem.snapshot.reader.SnapshotKeyReader)

Example 4 with SnapshotLeafRecord

use of io.datarouter.filesystem.snapshot.reader.record.SnapshotLeafRecord in project datarouter by hotpads.

the class BaseSnapshotTests method testScanLeafRecords.

@Test
public void testScanLeafRecords() {
    if (!ENABLED_TESTS.contains(TestId.SCAN_LEAF_RECORDS)) {
        return;
    }
    BlockLoader blockLoader = makeBlockLoader(useMemoryCache(), shareMemoryCache());
    var reader = new ScanningSnapshotReader(snapshotKey, exec, getNumThreads(), blockLoader, SCAN_NUM_BLOCKS);
    List<SnapshotLeafRecord> actuals = reader.scanLeafRecords(0).list();
    Assert.assertEquals(actuals.size(), sortedInputs.size());
    for (int i = 0; i < sortedInputs.size(); ++i) {
        Input input = sortedInputs.get(i);
        Assert.assertEquals(input.entry.key(), actuals.get(i).key);
        Assert.assertEquals(input.entry.value(), actuals.get(i).value);
    }
}
Also used : ScanningSnapshotReader(io.datarouter.filesystem.snapshot.reader.ScanningSnapshotReader) SnapshotLeafRecord(io.datarouter.filesystem.snapshot.reader.record.SnapshotLeafRecord) BlockLoader(io.datarouter.filesystem.snapshot.reader.block.BlockLoader) Test(org.testng.annotations.Test)

Example 5 with SnapshotLeafRecord

use of io.datarouter.filesystem.snapshot.reader.record.SnapshotLeafRecord in project datarouter by hotpads.

the class LeafBlockV1Tests method testFindRecordId.

@Test
public void testFindRecordId() {
    INPUTS.stream().forEach(input -> Assert.assertTrue(BLOCK.findRecordId(input.getLeft().getBytes()).isPresent()));
    IntStream.range(0, INPUTS.size()).forEach(i -> {
        SnapshotLeafRecord actual = BLOCK.snapshotLeafRecord(i);
        Assert.assertEquals(INPUTS.get(i).getLeft(), new String(actual.key));
        Assert.assertEquals(INPUTS.get(i).getRight(), new String(actual.value));
    });
}
Also used : SnapshotLeafRecord(io.datarouter.filesystem.snapshot.reader.record.SnapshotLeafRecord) Test(org.testng.annotations.Test)

Aggregations

SnapshotLeafRecord (io.datarouter.filesystem.snapshot.reader.record.SnapshotLeafRecord)7 Test (org.testng.annotations.Test)5 ScanningSnapshotReader (io.datarouter.filesystem.snapshot.reader.ScanningSnapshotReader)3 SnapshotIdReader (io.datarouter.filesystem.snapshot.reader.SnapshotIdReader)3 Scanner (io.datarouter.scanner.Scanner)3 Arrays (java.util.Arrays)3 List (java.util.List)3 AtomicLong (java.util.concurrent.atomic.AtomicLong)3 Logger (org.slf4j.Logger)3 LoggerFactory (org.slf4j.LoggerFactory)3 ByteTool (io.datarouter.bytes.ByteTool)2 SnapshotEntry (io.datarouter.filesystem.snapshot.entry.SnapshotEntry)2 SnapshotGroup (io.datarouter.filesystem.snapshot.group.SnapshotGroup)2 SnapshotWriteResult (io.datarouter.filesystem.snapshot.group.dto.SnapshotWriteResult)2 SnapshotKey (io.datarouter.filesystem.snapshot.key.SnapshotKey)2 SnapshotKeyReader (io.datarouter.filesystem.snapshot.reader.SnapshotKeyReader)2 BlockLoader (io.datarouter.filesystem.snapshot.reader.block.BlockLoader)2 SnapshotRecord (io.datarouter.filesystem.snapshot.reader.record.SnapshotRecord)2 SnapshotWriterConfig (io.datarouter.filesystem.snapshot.writer.SnapshotWriterConfig)2 ParallelScannerContext (io.datarouter.scanner.ParallelScannerContext)2