Search in sources :

Example 6 with ConnectorTableLayout

use of io.trino.spi.connector.ConnectorTableLayout in project trino by trinodb.

the class DeltaLakeMetadata method getInsertLayout.

@Override
public Optional<ConnectorTableLayout> getInsertLayout(ConnectorSession session, ConnectorTableHandle tableHandle) {
    DeltaLakeTableHandle deltaLakeTableHandle = (DeltaLakeTableHandle) tableHandle;
    List<String> partitionColumnNames = deltaLakeTableHandle.getMetadataEntry().getCanonicalPartitionColumns();
    if (partitionColumnNames.isEmpty()) {
        return Optional.empty();
    }
    return Optional.of(new ConnectorTableLayout(partitionColumnNames));
}
Also used : ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout)

Example 7 with ConnectorTableLayout

use of io.trino.spi.connector.ConnectorTableLayout in project trino by trinodb.

the class RaptorMetadata method getNewTableLayout.

@Override
public Optional<ConnectorTableLayout> getNewTableLayout(ConnectorSession session, ConnectorTableMetadata metadata) {
    ImmutableMap.Builder<String, RaptorColumnHandle> map = ImmutableMap.builder();
    long columnId = 1;
    for (ColumnMetadata column : metadata.getColumns()) {
        map.put(column.getName(), new RaptorColumnHandle(column.getName(), columnId, column.getType()));
        columnId++;
    }
    Optional<DistributionInfo> distribution = getOrCreateDistribution(map.buildOrThrow(), metadata.getProperties());
    if (distribution.isEmpty()) {
        return Optional.empty();
    }
    List<String> partitionColumns = distribution.get().getBucketColumns().stream().map(RaptorColumnHandle::getColumnName).collect(toList());
    ConnectorPartitioningHandle partitioning = getPartitioningHandle(distribution.get().getDistributionId());
    return Optional.of(new ConnectorTableLayout(partitioning, partitionColumns));
}
Also used : ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 8 with ConnectorTableLayout

use of io.trino.spi.connector.ConnectorTableLayout in project trino by trinodb.

the class RaptorMetadata method beginCreateTable.

@Override
public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, Optional<ConnectorTableLayout> layout) {
    if (viewExists(session, tableMetadata.getTable())) {
        throw new TrinoException(ALREADY_EXISTS, "View already exists: " + tableMetadata.getTable());
    }
    Optional<RaptorPartitioningHandle> partitioning = layout.map(ConnectorTableLayout::getPartitioning).map(Optional::get).map(RaptorPartitioningHandle.class::cast);
    ImmutableList.Builder<RaptorColumnHandle> columnHandles = ImmutableList.builder();
    ImmutableList.Builder<Type> columnTypes = ImmutableList.builder();
    long columnId = 1;
    for (ColumnMetadata column : tableMetadata.getColumns()) {
        columnHandles.add(new RaptorColumnHandle(column.getName(), columnId, column.getType()));
        columnTypes.add(column.getType());
        columnId++;
    }
    Map<String, RaptorColumnHandle> columnHandleMap = Maps.uniqueIndex(columnHandles.build(), RaptorColumnHandle::getColumnName);
    List<RaptorColumnHandle> sortColumnHandles = getSortColumnHandles(getSortColumns(tableMetadata.getProperties()), columnHandleMap);
    Optional<RaptorColumnHandle> temporalColumnHandle = getTemporalColumnHandle(getTemporalColumn(tableMetadata.getProperties()), columnHandleMap);
    if (temporalColumnHandle.isPresent()) {
        RaptorColumnHandle column = temporalColumnHandle.get();
        if (!column.getColumnType().equals(TIMESTAMP_MILLIS) && !column.getColumnType().equals(DATE)) {
            throw new TrinoException(NOT_SUPPORTED, "Temporal column must be of type timestamp or date: " + column.getColumnName());
        }
    }
    boolean organized = isOrganized(tableMetadata.getProperties());
    if (organized) {
        if (temporalColumnHandle.isPresent()) {
            throw new TrinoException(NOT_SUPPORTED, "Table with temporal columns cannot be organized");
        }
        if (sortColumnHandles.isEmpty()) {
            throw new TrinoException(NOT_SUPPORTED, "Table organization requires an ordering");
        }
    }
    long transactionId = shardManager.beginTransaction();
    setTransactionId(transactionId);
    Optional<DistributionInfo> distribution = partitioning.map(handle -> getDistributionInfo(handle.getDistributionId(), columnHandleMap, tableMetadata.getProperties()));
    return new RaptorOutputTableHandle(transactionId, tableMetadata.getTable().getSchemaName(), tableMetadata.getTable().getTableName(), columnHandles.build(), columnTypes.build(), sortColumnHandles, nCopies(sortColumnHandles.size(), ASC_NULLS_FIRST), temporalColumnHandle, distribution.map(info -> OptionalLong.of(info.getDistributionId())).orElse(OptionalLong.empty()), distribution.map(info -> OptionalInt.of(info.getBucketCount())).orElse(OptionalInt.empty()), organized, distribution.map(DistributionInfo::getBucketColumns).orElse(ImmutableList.of()));
}
Also used : ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ImmutableList(com.google.common.collect.ImmutableList) Type(io.trino.spi.type.Type) RaptorBucketFunction.validateBucketType(io.trino.plugin.raptor.legacy.RaptorBucketFunction.validateBucketType) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) TrinoException(io.trino.spi.TrinoException)

Example 9 with ConnectorTableLayout

use of io.trino.spi.connector.ConnectorTableLayout in project trino by trinodb.

the class IcebergMetadata method getWriteLayout.

private Optional<ConnectorTableLayout> getWriteLayout(Schema tableSchema, PartitionSpec partitionSpec, boolean forceRepartitioning) {
    if (partitionSpec.isUnpartitioned()) {
        return Optional.empty();
    }
    Map<Integer, IcebergColumnHandle> columnById = getColumns(tableSchema, typeManager).stream().collect(toImmutableMap(IcebergColumnHandle::getId, identity()));
    List<IcebergColumnHandle> partitioningColumns = partitionSpec.fields().stream().sorted(Comparator.comparing(PartitionField::sourceId)).map(field -> requireNonNull(columnById.get(field.sourceId()), () -> "Cannot find source column for partitioning field " + field)).distinct().collect(toImmutableList());
    List<String> partitioningColumnNames = partitioningColumns.stream().map(IcebergColumnHandle::getName).collect(toImmutableList());
    if (!forceRepartitioning && partitionSpec.fields().stream().allMatch(field -> field.transform().isIdentity())) {
        // Do not set partitioningHandle, to let engine determine whether to repartition data or not, on stat-based basis.
        return Optional.of(new ConnectorTableLayout(partitioningColumnNames));
    }
    IcebergPartitioningHandle partitioningHandle = new IcebergPartitioningHandle(toPartitionFields(partitionSpec), partitioningColumns);
    return Optional.of(new ConnectorTableLayout(partitioningHandle, partitioningColumnNames));
}
Also used : IcebergUtil.getPartitionKeys(io.trino.plugin.iceberg.IcebergUtil.getPartitionKeys) TrinoCatalog(io.trino.plugin.iceberg.catalog.TrinoCatalog) FileSystem(org.apache.hadoop.fs.FileSystem) ConnectorTableExecuteHandle(io.trino.spi.connector.ConnectorTableExecuteHandle) HiveApplyProjectionUtil.replaceWithNewVariables(io.trino.plugin.hive.HiveApplyProjectionUtil.replaceWithNewVariables) Collections.singletonList(java.util.Collections.singletonList) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Matcher(java.util.regex.Matcher) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) RewriteFiles(org.apache.iceberg.RewriteFiles) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) CloseableIterable(org.apache.iceberg.io.CloseableIterable) IcebergUtil.newCreateTableTransaction(io.trino.plugin.iceberg.IcebergUtil.newCreateTableTransaction) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) IcebergTableExecuteHandle(io.trino.plugin.iceberg.procedure.IcebergTableExecuteHandle) Set(java.util.Set) Schema(org.apache.iceberg.Schema) ColumnIdentity.primitiveColumnIdentity(io.trino.plugin.iceberg.ColumnIdentity.primitiveColumnIdentity) SchemaTableName(io.trino.spi.connector.SchemaTableName) PartitionSpecParser(org.apache.iceberg.PartitionSpecParser) Collectors.joining(java.util.stream.Collectors.joining) Type(org.apache.iceberg.types.Type) UncheckedIOException(java.io.UncheckedIOException) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) TrinoPrincipal(io.trino.spi.security.TrinoPrincipal) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) Iterables(com.google.common.collect.Iterables) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) IcebergUtil.deserializePartitionValue(io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue) Slice(io.airlift.slice.Slice) NullableValue(io.trino.spi.predicate.NullableValue) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Variable(io.trino.spi.expression.Variable) Supplier(java.util.function.Supplier) OptionalLong(java.util.OptionalLong) MaterializedViewFreshness(io.trino.spi.connector.MaterializedViewFreshness) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) FILE_FORMAT_PROPERTY(io.trino.plugin.iceberg.IcebergTableProperties.FILE_FORMAT_PROPERTY) OPTIMIZE(io.trino.plugin.iceberg.procedure.IcebergTableProcedureId.OPTIMIZE) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) DEPENDS_ON_TABLES(io.trino.plugin.iceberg.catalog.hms.TrinoHiveCatalog.DEPENDS_ON_TABLES) Table(org.apache.iceberg.Table) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) ICEBERG_INVALID_METADATA(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_INVALID_METADATA) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) DiscretePredicates(io.trino.spi.connector.DiscretePredicates) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) PartitionFields.parsePartitionFields(io.trino.plugin.iceberg.PartitionFields.parsePartitionFields) ArrayDeque(java.util.ArrayDeque) MaterializedViewNotFoundException(io.trino.spi.connector.MaterializedViewNotFoundException) TypeConverter.toTrinoType(io.trino.plugin.iceberg.TypeConverter.toTrinoType) IcebergUtil.getColumns(io.trino.plugin.iceberg.IcebergUtil.getColumns) IcebergSessionProperties.isProjectionPushdownEnabled(io.trino.plugin.iceberg.IcebergSessionProperties.isProjectionPushdownEnabled) IcebergTableProcedureId(io.trino.plugin.iceberg.procedure.IcebergTableProcedureId) IcebergSessionProperties.isStatisticsEnabled(io.trino.plugin.iceberg.IcebergSessionProperties.isStatisticsEnabled) AppendFiles(org.apache.iceberg.AppendFiles) NO_RETRIES(io.trino.spi.connector.RetryMode.NO_RETRIES) ConnectorMaterializedViewDefinition(io.trino.spi.connector.ConnectorMaterializedViewDefinition) TypeConverter.toIcebergType(io.trino.plugin.iceberg.TypeConverter.toIcebergType) PartitionField(org.apache.iceberg.PartitionField) DataFiles(org.apache.iceberg.DataFiles) CatalogSchemaName(io.trino.spi.connector.CatalogSchemaName) LOCATION_PROPERTY(io.trino.plugin.iceberg.IcebergTableProperties.LOCATION_PROPERTY) Path(org.apache.hadoop.fs.Path) DATA(io.trino.plugin.iceberg.TableType.DATA) ConnectorViewDefinition(io.trino.spi.connector.ConnectorViewDefinition) FileScanTask(org.apache.iceberg.FileScanTask) DataFile(org.apache.iceberg.DataFile) Splitter(com.google.common.base.Splitter) IcebergTableProperties.getPartitioning(io.trino.plugin.iceberg.IcebergTableProperties.getPartitioning) IcebergUtil.getFileFormat(io.trino.plugin.iceberg.IcebergUtil.getFileFormat) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) HiveWrittenPartitions(io.trino.plugin.hive.HiveWrittenPartitions) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) TrinoException(io.trino.spi.TrinoException) TableScan(org.apache.iceberg.TableScan) ConnectorOutputMetadata(io.trino.spi.connector.ConnectorOutputMetadata) String.format(java.lang.String.format) SchemaParser(org.apache.iceberg.SchemaParser) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) ClassLoaderSafeSystemTable(io.trino.plugin.base.classloader.ClassLoaderSafeSystemTable) IcebergUtil.getTableComment(io.trino.plugin.iceberg.IcebergUtil.getTableComment) Assignment(io.trino.spi.connector.Assignment) HiveApplyProjectionUtil(io.trino.plugin.hive.HiveApplyProjectionUtil) BeginTableExecuteResult(io.trino.spi.connector.BeginTableExecuteResult) PartitionSpec(org.apache.iceberg.PartitionSpec) Function.identity(java.util.function.Function.identity) TableProperties(org.apache.iceberg.TableProperties) Optional(java.util.Optional) ProjectedColumnRepresentation(io.trino.plugin.hive.HiveApplyProjectionUtil.ProjectedColumnRepresentation) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) Pattern(java.util.regex.Pattern) SystemTable(io.trino.spi.connector.SystemTable) JsonCodec(io.airlift.json.JsonCodec) Constraint(io.trino.spi.connector.Constraint) IcebergOptimizeHandle(io.trino.plugin.iceberg.procedure.IcebergOptimizeHandle) Logger(io.airlift.log.Logger) PartitionFields.toPartitionFields(io.trino.plugin.iceberg.PartitionFields.toPartitionFields) HashMap(java.util.HashMap) Deque(java.util.Deque) Function(java.util.function.Function) ExpressionConverter.toIcebergExpression(io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression) PARTITIONING_PROPERTY(io.trino.plugin.iceberg.IcebergTableProperties.PARTITIONING_PROPERTY) HashSet(java.util.HashSet) BiPredicate(java.util.function.BiPredicate) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) TableStatistics(io.trino.spi.statistics.TableStatistics) HiveApplyProjectionUtil.extractSupportedProjectedColumns(io.trino.plugin.hive.HiveApplyProjectionUtil.extractSupportedProjectedColumns) VerifyException(com.google.common.base.VerifyException) RetryMode(io.trino.spi.connector.RetryMode) Iterator(java.util.Iterator) TupleDomain(io.trino.spi.predicate.TupleDomain) IcebergUtil.toIcebergSchema(io.trino.plugin.iceberg.IcebergUtil.toIcebergSchema) Transaction(org.apache.iceberg.Transaction) Comparator(java.util.Comparator) TypeManager(io.trino.spi.type.TypeManager) HiveUtil.isStructuralType(io.trino.plugin.hive.util.HiveUtil.isStructuralType) Snapshot(org.apache.iceberg.Snapshot) PartitionField(org.apache.iceberg.PartitionField) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout)

Example 10 with ConnectorTableLayout

use of io.trino.spi.connector.ConnectorTableLayout in project trino by trinodb.

the class AbstractTestHive method testPreferredCreateTableLayout.

@Test
public void testPreferredCreateTableLayout() {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        Optional<ConnectorTableLayout> newTableLayout = metadata.getNewTableLayout(session, new ConnectorTableMetadata(new SchemaTableName("schema", "table"), ImmutableList.of(new ColumnMetadata("column1", BIGINT), new ColumnMetadata("column2", BIGINT)), ImmutableMap.of(PARTITIONED_BY_PROPERTY, ImmutableList.of("column2"), BUCKETED_BY_PROPERTY, ImmutableList.of(), BUCKET_COUNT_PROPERTY, 0, SORTED_BY_PROPERTY, ImmutableList.of())));
        assertTrue(newTableLayout.isPresent());
        assertFalse(newTableLayout.get().getPartitioning().isPresent());
        assertEquals(newTableLayout.get().getPartitionColumns(), ImmutableList.of("column2"));
    }
}
Also used : ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Test(org.testng.annotations.Test)

Aggregations

ConnectorTableLayout (io.trino.spi.connector.ConnectorTableLayout)18 ColumnMetadata (io.trino.spi.connector.ColumnMetadata)11 CatalogSchemaTableName (io.trino.spi.connector.CatalogSchemaTableName)10 ConnectorSession (io.trino.spi.connector.ConnectorSession)10 SchemaTableName (io.trino.spi.connector.SchemaTableName)10 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)8 ImmutableList (com.google.common.collect.ImmutableList)7 ConnectorPartitioningHandle (io.trino.spi.connector.ConnectorPartitioningHandle)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 Column (io.trino.plugin.hive.metastore.Column)6 ConnectorMetadata (io.trino.spi.connector.ConnectorMetadata)6 ConnectorTableMetadata (io.trino.spi.connector.ConnectorTableMetadata)6 Test (org.testng.annotations.Test)6 ImmutableSet (com.google.common.collect.ImmutableSet)5 HiveColumnHandle.createBaseColumn (io.trino.plugin.hive.HiveColumnHandle.createBaseColumn)5 SortingColumn (io.trino.plugin.hive.metastore.SortingColumn)5 TrinoException (io.trino.spi.TrinoException)5 ConnectorOutputTableHandle (io.trino.spi.connector.ConnectorOutputTableHandle)5 TestingConnectorSession (io.trino.testing.TestingConnectorSession)5 Verify.verify (com.google.common.base.Verify.verify)4