Search in sources :

Example 11 with ShardInfo

use of io.trino.plugin.raptor.legacy.metadata.ShardInfo in project trino by trinodb.

the class TestShardEjector method testEjector.

@Test(invocationCount = 20)
public void testEjector() throws Exception {
    NodeManager nodeManager = createNodeManager("node1", "node2", "node3", "node4", "node5");
    ShardEjector ejector = new ShardEjector(nodeManager.getCurrentNode().getNodeIdentifier(), nodeManager::getWorkerNodes, shardManager, storageService, new Duration(1, HOURS), Optional.of(new TestingBackupStore()), "test");
    List<ShardInfo> shards = ImmutableList.<ShardInfo>builder().add(shardInfo("node1", 14)).add(shardInfo("node1", 13)).add(shardInfo("node1", 12)).add(shardInfo("node1", 11)).add(shardInfo("node1", 10)).add(shardInfo("node1", 10)).add(shardInfo("node1", 10)).add(shardInfo("node1", 10)).add(shardInfo("node2", 5)).add(shardInfo("node2", 5)).add(shardInfo("node3", 10)).add(shardInfo("node4", 10)).add(shardInfo("node5", 10)).add(shardInfo("node6", 200)).build();
    long tableId = createTable("test");
    List<ColumnInfo> columns = ImmutableList.of(new ColumnInfo(1, BIGINT));
    shardManager.createTable(tableId, columns, false, OptionalLong.empty());
    long transactionId = shardManager.beginTransaction();
    shardManager.commitShards(transactionId, tableId, columns, shards, Optional.empty(), 0);
    for (ShardInfo shard : shards.subList(0, 8)) {
        File file = storageService.getStorageFile(shard.getShardUuid());
        storageService.createParents(file);
        assertTrue(file.createNewFile());
    }
    ejector.process();
    shardManager.getShardNodes(tableId, TupleDomain.all());
    Set<UUID> ejectedShards = shards.subList(0, 4).stream().map(ShardInfo::getShardUuid).collect(toSet());
    Set<UUID> keptShards = shards.subList(4, 8).stream().map(ShardInfo::getShardUuid).collect(toSet());
    Set<UUID> remaining = uuids(shardManager.getNodeShards("node1"));
    for (UUID uuid : ejectedShards) {
        assertFalse(remaining.contains(uuid));
        assertFalse(storageService.getStorageFile(uuid).exists());
    }
    assertEquals(remaining, keptShards);
    for (UUID uuid : keptShards) {
        assertTrue(storageService.getStorageFile(uuid).exists());
    }
    Set<UUID> others = ImmutableSet.<UUID>builder().addAll(uuids(shardManager.getNodeShards("node2"))).addAll(uuids(shardManager.getNodeShards("node3"))).addAll(uuids(shardManager.getNodeShards("node4"))).addAll(uuids(shardManager.getNodeShards("node5"))).build();
    assertTrue(others.containsAll(ejectedShards));
}
Also used : ColumnInfo(io.trino.plugin.raptor.legacy.metadata.ColumnInfo) Duration(io.airlift.units.Duration) NodeManager(io.trino.spi.NodeManager) TestingNodeManager(io.trino.testing.TestingNodeManager) UUID(java.util.UUID) UUID.randomUUID(java.util.UUID.randomUUID) File(java.io.File) ShardInfo(io.trino.plugin.raptor.legacy.metadata.ShardInfo) Test(org.testng.annotations.Test)

Example 12 with ShardInfo

use of io.trino.plugin.raptor.legacy.metadata.ShardInfo in project trino by trinodb.

the class RaptorStorageManager method rewriteShard.

@VisibleForTesting
Collection<Slice> rewriteShard(long transactionId, OptionalInt bucketNumber, UUID shardUuid, BitSet rowsToDelete) {
    if (rowsToDelete.isEmpty()) {
        return ImmutableList.of();
    }
    UUID newShardUuid = UUID.randomUUID();
    File input = storageService.getStorageFile(shardUuid);
    File output = storageService.getStagingFile(newShardUuid);
    OrcFileInfo info = rewriteFile(input, output, rowsToDelete);
    long rowCount = info.getRowCount();
    if (rowCount == 0) {
        return shardDelta(shardUuid, Optional.empty());
    }
    shardRecorder.recordCreatedShard(transactionId, newShardUuid);
    // submit for backup and wait until it finishes
    getFutureValue(backupManager.submit(newShardUuid, output));
    Set<String> nodes = ImmutableSet.of(nodeId);
    long uncompressedSize = info.getUncompressedSize();
    ShardInfo shard = createShardInfo(newShardUuid, bucketNumber, output, nodes, rowCount, uncompressedSize);
    writeShard(newShardUuid);
    return shardDelta(shardUuid, Optional.of(shard));
}
Also used : OrcFileInfo(io.trino.plugin.raptor.legacy.storage.OrcFileRewriter.OrcFileInfo) UUID(java.util.UUID) File(java.io.File) ShardInfo(io.trino.plugin.raptor.legacy.metadata.ShardInfo) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 13 with ShardInfo

use of io.trino.plugin.raptor.legacy.metadata.ShardInfo in project trino by trinodb.

the class RaptorMetadata method finishDelete.

@Override
public void finishDelete(ConnectorSession session, ConnectorTableHandle tableHandle, Collection<Slice> fragments) {
    RaptorTableHandle table = (RaptorTableHandle) tableHandle;
    long transactionId = table.getTransactionId().getAsLong();
    long tableId = table.getTableId();
    List<ColumnInfo> columns = getColumnHandles(session, tableHandle).values().stream().map(RaptorColumnHandle.class::cast).map(ColumnInfo::fromHandle).collect(toList());
    ImmutableSet.Builder<UUID> oldShardUuidsBuilder = ImmutableSet.builder();
    ImmutableList.Builder<ShardInfo> newShardsBuilder = ImmutableList.builder();
    fragments.stream().map(fragment -> SHARD_DELTA_CODEC.fromJson(fragment.getBytes())).forEach(delta -> {
        oldShardUuidsBuilder.addAll(delta.getOldShardUuids());
        newShardsBuilder.addAll(delta.getNewShards());
    });
    Set<UUID> oldShardUuids = oldShardUuidsBuilder.build();
    List<ShardInfo> newShards = newShardsBuilder.build();
    OptionalLong updateTime = OptionalLong.of(session.getStart().toEpochMilli());
    log.info("Finishing delete for tableId %s (removed: %s, rewritten: %s)", tableId, oldShardUuids.size() - newShards.size(), newShards.size());
    shardManager.replaceShardUuids(transactionId, tableId, columns, oldShardUuids, newShards, updateTime);
    clearRollback();
}
Also used : ORGANIZED_PROPERTY(io.trino.plugin.raptor.legacy.RaptorTableProperties.ORGANIZED_PROPERTY) SHARD_UUID_COLUMN_TYPE(io.trino.plugin.raptor.legacy.RaptorColumnHandle.SHARD_UUID_COLUMN_TYPE) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) ObjectMapperProvider(io.airlift.json.ObjectMapperProvider) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) RaptorTableProperties.isOrganized(io.trino.plugin.raptor.legacy.RaptorTableProperties.isOrganized) ShardDelta(io.trino.plugin.raptor.legacy.metadata.ShardDelta) RaptorColumnHandle.shardUuidColumnHandle(io.trino.plugin.raptor.legacy.RaptorColumnHandle.shardUuidColumnHandle) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) RaptorSessionProperties.getExternalBatchId(io.trino.plugin.raptor.legacy.RaptorSessionProperties.getExternalBatchId) RAPTOR_ERROR(io.trino.plugin.raptor.legacy.RaptorErrorCode.RAPTOR_ERROR) ViewNotFoundException(io.trino.spi.connector.ViewNotFoundException) Set(java.util.Set) DatabaseUtil.onDemandDao(io.trino.plugin.raptor.legacy.util.DatabaseUtil.onDemandDao) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) ImmutableListMultimap(com.google.common.collect.ImmutableListMultimap) ConnectorTablePartitioning(io.trino.spi.connector.ConnectorTablePartitioning) DATE(io.trino.spi.type.DateType.DATE) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) Slice(io.airlift.slice.Slice) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) TIMESTAMP_MILLIS(io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) BUCKETED_ON_PROPERTY(io.trino.plugin.raptor.legacy.RaptorTableProperties.BUCKETED_ON_PROPERTY) Multimaps(com.google.common.collect.Multimaps) ArrayList(java.util.ArrayList) OptionalLong(java.util.OptionalLong) ColumnRangesSystemTable(io.trino.plugin.raptor.legacy.systemtables.ColumnRangesSystemTable) RaptorTableProperties.getSortColumns(io.trino.plugin.raptor.legacy.RaptorTableProperties.getSortColumns) ColumnHandle(io.trino.spi.connector.ColumnHandle) ORDERING_PROPERTY(io.trino.plugin.raptor.legacy.RaptorTableProperties.ORDERING_PROPERTY) INVALID_TABLE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) Jdbi(org.jdbi.v3.core.Jdbi) SHARD_UUID_COLUMN_NAME(io.trino.plugin.raptor.legacy.RaptorColumnHandle.SHARD_UUID_COLUMN_NAME) DatabaseUtil.runTransaction(io.trino.plugin.raptor.legacy.util.DatabaseUtil.runTransaction) MoreCollectors.toOptional(com.google.common.collect.MoreCollectors.toOptional) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) DatabaseUtil.daoTransaction(io.trino.plugin.raptor.legacy.util.DatabaseUtil.daoTransaction) TableColumn(io.trino.plugin.raptor.legacy.metadata.TableColumn) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) ConnectorSession(io.trino.spi.connector.ConnectorSession) Table(io.trino.plugin.raptor.legacy.metadata.Table) JsonCodec.jsonCodec(io.airlift.json.JsonCodec.jsonCodec) ShardInfo(io.trino.plugin.raptor.legacy.metadata.ShardInfo) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) TreeMap(java.util.TreeMap) ShardManager(io.trino.plugin.raptor.legacy.metadata.ShardManager) RaptorColumnHandle.bucketNumberColumnHandle(io.trino.plugin.raptor.legacy.RaptorColumnHandle.bucketNumberColumnHandle) ColumnInfo(io.trino.plugin.raptor.legacy.metadata.ColumnInfo) ViewResult(io.trino.plugin.raptor.legacy.metadata.ViewResult) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) RaptorTableProperties.getBucketColumns(io.trino.plugin.raptor.legacy.RaptorTableProperties.getBucketColumns) RaptorSessionProperties.getOneSplitPerBucketThreshold(io.trino.plugin.raptor.legacy.RaptorSessionProperties.getOneSplitPerBucketThreshold) ALREADY_EXISTS(io.trino.spi.StandardErrorCode.ALREADY_EXISTS) ConnectorViewDefinition(io.trino.spi.connector.ConnectorViewDefinition) INTEGER(io.trino.spi.type.IntegerType.INTEGER) MetadataDao(io.trino.plugin.raptor.legacy.metadata.MetadataDao) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.nCopies(java.util.Collections.nCopies) Collection(java.util.Collection) ColumnRangesSystemTable.getSourceTable(io.trino.plugin.raptor.legacy.systemtables.ColumnRangesSystemTable.getSourceTable) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) TrinoException(io.trino.spi.TrinoException) UUID(java.util.UUID) ConnectorOutputMetadata(io.trino.spi.connector.ConnectorOutputMetadata) ASC_NULLS_FIRST(io.trino.spi.connector.SortOrder.ASC_NULLS_FIRST) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) DISTRIBUTION_NAME_PROPERTY(io.trino.plugin.raptor.legacy.RaptorTableProperties.DISTRIBUTION_NAME_PROPERTY) List(java.util.List) BUCKET_COUNT_PROPERTY(io.trino.plugin.raptor.legacy.RaptorTableProperties.BUCKET_COUNT_PROPERTY) JsonCodecFactory(io.airlift.json.JsonCodecFactory) Optional(java.util.Optional) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) SystemTable(io.trino.spi.connector.SystemTable) SortedMap(java.util.SortedMap) JsonCodec(io.airlift.json.JsonCodec) RaptorColumnHandle.isHiddenColumn(io.trino.plugin.raptor.legacy.RaptorColumnHandle.isHiddenColumn) RaptorColumnHandle.shardRowIdHandle(io.trino.plugin.raptor.legacy.RaptorColumnHandle.shardRowIdHandle) Constraint(io.trino.spi.connector.Constraint) Logger(io.airlift.log.Logger) Type(io.trino.spi.type.Type) OptionalInt(java.util.OptionalInt) AtomicReference(java.util.concurrent.atomic.AtomicReference) Collectors.toCollection(java.util.stream.Collectors.toCollection) Distribution(io.trino.plugin.raptor.legacy.metadata.Distribution) RaptorTableProperties.getDistributionName(io.trino.plugin.raptor.legacy.RaptorTableProperties.getDistributionName) ImmutableList(com.google.common.collect.ImmutableList) BUCKET_NUMBER_COLUMN_NAME(io.trino.plugin.raptor.legacy.RaptorColumnHandle.BUCKET_NUMBER_COLUMN_NAME) Objects.requireNonNull(java.util.Objects.requireNonNull) NOT_FOUND(io.trino.spi.StandardErrorCode.NOT_FOUND) RaptorTableProperties.getTemporalColumn(io.trino.plugin.raptor.legacy.RaptorTableProperties.getTemporalColumn) TupleDomain(io.trino.spi.predicate.TupleDomain) Maps(com.google.common.collect.Maps) LongConsumer(java.util.function.LongConsumer) TEMPORAL_COLUMN_PROPERTY(io.trino.plugin.raptor.legacy.RaptorTableProperties.TEMPORAL_COLUMN_PROPERTY) RaptorBucketFunction.validateBucketType(io.trino.plugin.raptor.legacy.RaptorBucketFunction.validateBucketType) Collectors.toList(java.util.stream.Collectors.toList) DatabaseUtil.runIgnoringConstraintViolation(io.trino.plugin.raptor.legacy.util.DatabaseUtil.runIgnoringConstraintViolation) RaptorTableProperties.getBucketCount(io.trino.plugin.raptor.legacy.RaptorTableProperties.getBucketCount) ImmutableList(com.google.common.collect.ImmutableList) ColumnInfo(io.trino.plugin.raptor.legacy.metadata.ColumnInfo) ImmutableSet(com.google.common.collect.ImmutableSet) OptionalLong(java.util.OptionalLong) UUID(java.util.UUID) ShardInfo(io.trino.plugin.raptor.legacy.metadata.ShardInfo)

Aggregations

ShardInfo (io.trino.plugin.raptor.legacy.metadata.ShardInfo)13 Type (io.trino.spi.type.Type)7 UUID (java.util.UUID)7 Test (org.testng.annotations.Test)6 VarcharType.createVarcharType (io.trino.spi.type.VarcharType.createVarcharType)4 File (java.io.File)4 OptionalLong (java.util.OptionalLong)4 Slice (io.airlift.slice.Slice)3 ColumnInfo (io.trino.plugin.raptor.legacy.metadata.ColumnInfo)3 ShardDelta (io.trino.plugin.raptor.legacy.metadata.ShardDelta)3 TableColumn (io.trino.plugin.raptor.legacy.metadata.TableColumn)3 Page (io.trino.spi.Page)3 ImmutableList (com.google.common.collect.ImmutableList)2 Slices.utf8Slice (io.airlift.slice.Slices.utf8Slice)2 ColumnStats (io.trino.plugin.raptor.legacy.metadata.ColumnStats)2 ShardManager (io.trino.plugin.raptor.legacy.metadata.ShardManager)2 Table (io.trino.plugin.raptor.legacy.metadata.Table)2 StoragePageSink (io.trino.plugin.raptor.legacy.storage.StoragePageSink)2 SchemaTableName (io.trino.spi.connector.SchemaTableName)2 TupleDomain (io.trino.spi.predicate.TupleDomain)2