Search in sources :

Example 1 with ShardInfo

use of io.trino.plugin.raptor.legacy.metadata.ShardInfo in project trino by trinodb.

the class RaptorStorageManager method shardDelta.

private static Collection<Slice> shardDelta(UUID oldShardUuid, Optional<ShardInfo> shardInfo) {
    List<ShardInfo> newShards = shardInfo.map(ImmutableList::of).orElse(ImmutableList.of());
    ShardDelta delta = new ShardDelta(ImmutableList.of(oldShardUuid), newShards);
    return ImmutableList.of(Slices.wrappedBuffer(SHARD_DELTA_CODEC.toJsonBytes(delta)));
}
Also used : ShardDelta(io.trino.plugin.raptor.legacy.metadata.ShardDelta) ShardInfo(io.trino.plugin.raptor.legacy.metadata.ShardInfo)

Example 2 with ShardInfo

use of io.trino.plugin.raptor.legacy.metadata.ShardInfo in project trino by trinodb.

the class OrganizationJob method runJob.

private void runJob(long transactionId, OptionalInt bucketNumber, long tableId, Set<UUID> shardUuids) throws IOException {
    TableMetadata metadata = getTableMetadata(tableId);
    List<ShardInfo> newShards = performCompaction(transactionId, bucketNumber, shardUuids, metadata);
    log.info("Compacted shards %s into %s", shardUuids, newShards.stream().map(ShardInfo::getShardUuid).collect(toList()));
    shardManager.replaceShardUuids(transactionId, tableId, metadata.getColumns(), shardUuids, newShards, OptionalLong.empty());
}
Also used : TableMetadata(io.trino.plugin.raptor.legacy.metadata.TableMetadata) ShardInfo(io.trino.plugin.raptor.legacy.metadata.ShardInfo)

Example 3 with ShardInfo

use of io.trino.plugin.raptor.legacy.metadata.ShardInfo in project trino by trinodb.

the class ShardCompactor method compact.

public List<ShardInfo> compact(long transactionId, OptionalInt bucketNumber, Set<UUID> uuids, List<ColumnInfo> columns) throws IOException {
    long start = System.nanoTime();
    List<Long> columnIds = columns.stream().map(ColumnInfo::getColumnId).collect(toList());
    List<Type> columnTypes = columns.stream().map(ColumnInfo::getType).collect(toList());
    StoragePageSink storagePageSink = storageManager.createStoragePageSink(transactionId, bucketNumber, columnIds, columnTypes, false);
    List<ShardInfo> shardInfos;
    try {
        shardInfos = compact(storagePageSink, bucketNumber, uuids, columnIds, columnTypes);
    } catch (IOException | RuntimeException e) {
        storagePageSink.rollback();
        throw e;
    }
    updateStats(uuids.size(), shardInfos.size(), nanosSince(start).toMillis());
    return shardInfos;
}
Also used : Type(io.trino.spi.type.Type) StoragePageSink(io.trino.plugin.raptor.legacy.storage.StoragePageSink) IOException(java.io.IOException) ShardInfo(io.trino.plugin.raptor.legacy.metadata.ShardInfo)

Example 4 with ShardInfo

use of io.trino.plugin.raptor.legacy.metadata.ShardInfo in project trino by trinodb.

the class ShardCompactor method compactSorted.

public List<ShardInfo> compactSorted(long transactionId, OptionalInt bucketNumber, Set<UUID> uuids, List<ColumnInfo> columns, List<Long> sortColumnIds, List<SortOrder> sortOrders) throws IOException {
    checkArgument(sortColumnIds.size() == sortOrders.size(), "sortColumnIds and sortOrders must be of the same size");
    long start = System.nanoTime();
    List<Long> columnIds = columns.stream().map(ColumnInfo::getColumnId).collect(toList());
    List<Type> columnTypes = columns.stream().map(ColumnInfo::getType).collect(toList());
    checkArgument(columnIds.containsAll(sortColumnIds), "sortColumnIds must be a subset of columnIds");
    List<Integer> sortIndexes = sortColumnIds.stream().map(columnIds::indexOf).collect(toList());
    Queue<SortedRowSource> rowSources = new PriorityQueue<>();
    StoragePageSink outputPageSink = storageManager.createStoragePageSink(transactionId, bucketNumber, columnIds, columnTypes, false);
    try {
        for (UUID uuid : uuids) {
            ConnectorPageSource pageSource = storageManager.getPageSource(uuid, bucketNumber, columnIds, columnTypes, TupleDomain.all(), orcReaderOptions);
            SortedRowSource rowSource = new SortedRowSource(pageSource, columnTypes, sortIndexes, sortOrders, typeOperators);
            rowSources.add(rowSource);
        }
        while (!rowSources.isEmpty()) {
            SortedRowSource rowSource = rowSources.poll();
            if (!rowSource.hasNext()) {
                // rowSource is empty, close it
                rowSource.close();
                continue;
            }
            outputPageSink.appendRow(rowSource.next());
            if (outputPageSink.isFull()) {
                outputPageSink.flush();
            }
            rowSources.add(rowSource);
        }
        outputPageSink.flush();
        List<ShardInfo> shardInfos = getFutureValue(outputPageSink.commit());
        updateStats(uuids.size(), shardInfos.size(), nanosSince(start).toMillis());
        return shardInfos;
    } catch (IOException | RuntimeException e) {
        outputPageSink.rollback();
        throw e;
    } finally {
        rowSources.forEach(SortedRowSource::closeQuietly);
    }
}
Also used : StoragePageSink(io.trino.plugin.raptor.legacy.storage.StoragePageSink) IOException(java.io.IOException) PriorityQueue(java.util.PriorityQueue) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) Type(io.trino.spi.type.Type) UUID(java.util.UUID) ShardInfo(io.trino.plugin.raptor.legacy.metadata.ShardInfo)

Example 5 with ShardInfo

use of io.trino.plugin.raptor.legacy.metadata.ShardInfo in project trino by trinodb.

the class TestRaptorStorageManager method testWriter.

@Test
public void testWriter() throws Exception {
    RaptorStorageManager manager = createRaptorStorageManager();
    List<Long> columnIds = ImmutableList.of(3L, 7L);
    List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(10));
    StoragePageSink sink = createStoragePageSink(manager, columnIds, columnTypes);
    List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello").row(456L, "bye").build();
    sink.appendPages(pages);
    // shard is not recorded until flush
    assertEquals(shardRecorder.getShards().size(), 0);
    sink.flush();
    // shard is recorded after flush
    List<RecordedShard> recordedShards = shardRecorder.getShards();
    assertEquals(recordedShards.size(), 1);
    List<ShardInfo> shards = getFutureValue(sink.commit());
    assertEquals(shards.size(), 1);
    ShardInfo shardInfo = Iterables.getOnlyElement(shards);
    UUID shardUuid = shardInfo.getShardUuid();
    File file = storageService.getStorageFile(shardUuid);
    File backupFile = fileBackupStore.getBackupFile(shardUuid);
    assertEquals(recordedShards.get(0).getTransactionId(), TRANSACTION_ID);
    assertEquals(recordedShards.get(0).getShardUuid(), shardUuid);
    assertEquals(shardInfo.getRowCount(), 2);
    assertEquals(shardInfo.getCompressedSize(), file.length());
    assertEquals(shardInfo.getXxhash64(), xxhash64(file));
    // verify primary and backup shard exist
    assertFile(file, "primary shard");
    assertFile(backupFile, "backup shard");
    assertFileEquals(file, backupFile);
    // remove primary shard to force recovery from backup
    assertTrue(file.delete());
    assertTrue(file.getParentFile().delete());
    assertFalse(file.exists());
    recoveryManager.restoreFromBackup(shardUuid, shardInfo.getCompressedSize(), OptionalLong.of(shardInfo.getXxhash64()));
    try (OrcDataSource dataSource = manager.openShard(shardUuid, READER_OPTIONS)) {
        OrcRecordReader reader = createReader(dataSource, columnIds, columnTypes);
        Page page = reader.nextPage();
        assertEquals(page.getPositionCount(), 2);
        Block column0 = page.getBlock(0);
        assertEquals(column0.isNull(0), false);
        assertEquals(column0.isNull(1), false);
        assertEquals(BIGINT.getLong(column0, 0), 123L);
        assertEquals(BIGINT.getLong(column0, 1), 456L);
        Block column1 = page.getBlock(1);
        assertEquals(createVarcharType(10).getSlice(column1, 0), utf8Slice("hello"));
        assertEquals(createVarcharType(10).getSlice(column1, 1), utf8Slice("bye"));
        assertNull(reader.nextPage());
    }
}
Also used : OrcDataSource(io.trino.orc.OrcDataSource) Page(io.trino.spi.Page) RecordedShard(io.trino.plugin.raptor.legacy.storage.InMemoryShardRecorder.RecordedShard) OrcRecordReader(io.trino.orc.OrcRecordReader) Type(io.trino.spi.type.Type) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) OptionalLong(java.util.OptionalLong) Block(io.trino.spi.block.Block) UUID(java.util.UUID) FileAssert.assertFile(org.testng.FileAssert.assertFile) File(java.io.File) ShardInfo(io.trino.plugin.raptor.legacy.metadata.ShardInfo) Test(org.testng.annotations.Test)

Aggregations

ShardInfo (io.trino.plugin.raptor.legacy.metadata.ShardInfo)13 Type (io.trino.spi.type.Type)7 UUID (java.util.UUID)7 Test (org.testng.annotations.Test)6 VarcharType.createVarcharType (io.trino.spi.type.VarcharType.createVarcharType)4 File (java.io.File)4 OptionalLong (java.util.OptionalLong)4 Slice (io.airlift.slice.Slice)3 ColumnInfo (io.trino.plugin.raptor.legacy.metadata.ColumnInfo)3 ShardDelta (io.trino.plugin.raptor.legacy.metadata.ShardDelta)3 TableColumn (io.trino.plugin.raptor.legacy.metadata.TableColumn)3 Page (io.trino.spi.Page)3 ImmutableList (com.google.common.collect.ImmutableList)2 Slices.utf8Slice (io.airlift.slice.Slices.utf8Slice)2 ColumnStats (io.trino.plugin.raptor.legacy.metadata.ColumnStats)2 ShardManager (io.trino.plugin.raptor.legacy.metadata.ShardManager)2 Table (io.trino.plugin.raptor.legacy.metadata.Table)2 StoragePageSink (io.trino.plugin.raptor.legacy.storage.StoragePageSink)2 SchemaTableName (io.trino.spi.connector.SchemaTableName)2 TupleDomain (io.trino.spi.predicate.TupleDomain)2