Search in sources :

Example 1 with DeltaTableOptimizeHandle

use of io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle in project trino by trinodb.

the class DeltaLakeMetadata method getTableHandleForOptimize.

private Optional<ConnectorTableExecuteHandle> getTableHandleForOptimize(DeltaLakeTableHandle tableHandle, Map<String, Object> executeProperties) {
    DataSize maxScannedFileSize = (DataSize) executeProperties.get("file_size_threshold");
    List<DeltaLakeColumnHandle> columns = getColumns(tableHandle.getMetadataEntry()).stream().filter(column -> column.getColumnType() != SYNTHESIZED).collect(toImmutableList());
    return Optional.of(new DeltaLakeTableExecuteHandle(tableHandle.getSchemaTableName(), OPTIMIZE, new DeltaTableOptimizeHandle(tableHandle.getMetadataEntry(), columns, tableHandle.getMetadataEntry().getOriginalPartitionColumns(), maxScannedFileSize, Optional.empty()), tableHandle.getLocation()));
}
Also used : TransactionLogUtil.getTransactionLogDir(io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogDir) FileSystem(org.apache.hadoop.fs.FileSystem) TableSnapshot(io.trino.plugin.deltalake.transactionlog.TableSnapshot) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) FileStatus(org.apache.hadoop.fs.FileStatus) DeltaLakeSchemaSupport.validateType(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.validateType) TypeUtils.isFloatingPointNaN(io.trino.spi.type.TypeUtils.isFloatingPointNaN) RemoveFileEntry(io.trino.plugin.deltalake.transactionlog.RemoveFileEntry) ConnectorTableExecuteHandle(io.trino.spi.connector.ConnectorTableExecuteHandle) Collections.singletonList(java.util.Collections.singletonList) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TransactionLogWriterFactory(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriterFactory) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) ValueSet.ofRanges(io.trino.spi.predicate.ValueSet.ofRanges) Column(io.trino.plugin.hive.metastore.Column) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) PARTITIONED_BY_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.PARTITIONED_BY_PROPERTY) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) PRESTO_QUERY_ID_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME) ENGLISH(java.util.Locale.ENGLISH) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) Table(io.trino.plugin.hive.metastore.Table) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TABLE_PROVIDER_PROPERTY(io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_PROPERTY) HiveWriteUtils.pathExists(io.trino.plugin.hive.util.HiveWriteUtils.pathExists) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) SchemaTableName(io.trino.spi.connector.SchemaTableName) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) TrinoPrincipal(io.trino.spi.security.TrinoPrincipal) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) HyperLogLog(io.airlift.stats.cardinality.HyperLogLog) DateTimeEncoding.unpackMillisUtc(io.trino.spi.type.DateTimeEncoding.unpackMillisUtc) FILE_MODIFIED_TIME_COLUMN_NAME(io.trino.plugin.deltalake.DeltaLakeColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME) Predicate.not(java.util.function.Predicate.not) TableColumnsMetadata(io.trino.spi.connector.TableColumnsMetadata) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) ANALYZE_COLUMNS_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.ANALYZE_COLUMNS_PROPERTY) REGULAR(io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR) TransactionLogParser.getMandatoryCurrentVersion(io.trino.plugin.deltalake.transactionlog.TransactionLogParser.getMandatoryCurrentVersion) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) Iterables(com.google.common.collect.Iterables) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) DeltaLakeColumnHandle.fileSizeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.fileSizeColumnHandle) Slice(io.airlift.slice.Slice) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) DeltaLakeTableProcedureId(io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId) INVALID_ANALYZE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Variable(io.trino.spi.expression.Variable) DeltaLakeTableProperties.getLocation(io.trino.plugin.deltalake.DeltaLakeTableProperties.getLocation) Range.greaterThanOrEqual(io.trino.spi.predicate.Range.greaterThanOrEqual) TransactionConflictException(io.trino.plugin.deltalake.transactionlog.writer.TransactionConflictException) HiveType(io.trino.plugin.hive.HiveType) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) DeltaLakeStatisticsAccess(io.trino.plugin.deltalake.statistics.DeltaLakeStatisticsAccess) DeltaLakeSchemaSupport.extractPartitionColumns(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractPartitionColumns) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) INVALID_TABLE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) DeltaLakeSchemaSupport.serializeStatsAsJson(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeStatsAsJson) Nullable(javax.annotation.Nullable) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) MapType(io.trino.spi.type.MapType) PARTITION_KEY(io.trino.plugin.deltalake.DeltaLakeColumnType.PARTITION_KEY) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) DELTA_LAKE_INVALID_SCHEMA(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA) CheckpointWriterManager(io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointWriterManager) ROW_ID_COLUMN_TYPE(io.trino.plugin.deltalake.DeltaLakeColumnHandle.ROW_ID_COLUMN_TYPE) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) HiveUtil.isHiveSystemSchema(io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) MAX_VALUE(io.trino.spi.statistics.ColumnStatisticType.MAX_VALUE) DeltaLakeSessionProperties.isTableStatisticsEnabled(io.trino.plugin.deltalake.DeltaLakeSessionProperties.isTableStatisticsEnabled) LOCATION_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.LOCATION_PROPERTY) TableStatisticsMetadata(io.trino.spi.statistics.TableStatisticsMetadata) TINYINT(io.trino.spi.type.TinyintType.TINYINT) NotADeltaLakeTableException(io.trino.plugin.deltalake.metastore.NotADeltaLakeTableException) DeltaLakeStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeStatistics) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) NodeManager(io.trino.spi.NodeManager) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) Database(io.trino.plugin.hive.metastore.Database) DeltaLakeSchemaSupport.extractSchema(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractSchema) SYNTHESIZED(io.trino.plugin.deltalake.DeltaLakeColumnType.SYNTHESIZED) TABLE_PROVIDER_VALUE(io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_VALUE) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) DeltaLakeMetastore(io.trino.plugin.deltalake.metastore.DeltaLakeMetastore) Format(io.trino.plugin.deltalake.transactionlog.MetadataEntry.Format) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Locale(java.util.Locale) CatalogSchemaName(io.trino.spi.connector.CatalogSchemaName) Path(org.apache.hadoop.fs.Path) HyperLogLogType(io.trino.spi.type.HyperLogLogType) INTEGER(io.trino.spi.type.IntegerType.INTEGER) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) RowType(io.trino.spi.type.RowType) Range.range(io.trino.spi.predicate.Range.range) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) HiveWriteUtils.isS3FileSystem(io.trino.plugin.hive.util.HiveWriteUtils.isS3FileSystem) TransactionLogWriter(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriter) Collection(java.util.Collection) DeltaLakeTableExecuteHandle(io.trino.plugin.deltalake.procedure.DeltaLakeTableExecuteHandle) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) Instant(java.time.Instant) ConnectorOutputMetadata(io.trino.spi.connector.ConnectorOutputMetadata) Sets(com.google.common.collect.Sets) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ROW_ID_COLUMN_NAME(io.trino.plugin.deltalake.DeltaLakeColumnHandle.ROW_ID_COLUMN_NAME) INVALID_SCHEMA_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) MetastoreUtil.buildInitialPrivilegeSet(io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet) Assignment(io.trino.spi.connector.Assignment) BeginTableExecuteResult(io.trino.spi.connector.BeginTableExecuteResult) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) DecimalType(io.trino.spi.type.DecimalType) OPTIMIZE(io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId.OPTIMIZE) JsonCodec(io.airlift.json.JsonCodec) Comparators(com.google.common.collect.Comparators) Constraint(io.trino.spi.connector.Constraint) Range.lessThanOrEqual(io.trino.spi.predicate.Range.lessThanOrEqual) DeltaLakeFileStatistics(io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics) Logger(io.airlift.log.Logger) DeltaLakeSchemaSupport.serializeSchemaAsJson(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeSchemaAsJson) DeltaLakeColumnStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeColumnStatistics) Type(io.trino.spi.type.Type) HashMap(java.util.HashMap) DeltaLakeColumnHandle.pathColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.pathColumnHandle) DeltaLakeColumnHandle.fileModifiedTimeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.fileModifiedTimeColumnHandle) AtomicReference(java.util.concurrent.atomic.AtomicReference) VarcharType(io.trino.spi.type.VarcharType) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeSessionProperties.isExtendedStatisticsEnabled(io.trino.plugin.deltalake.DeltaLakeSessionProperties.isExtendedStatisticsEnabled) VIRTUAL_VIEW(org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW) CHECKPOINT_INTERVAL_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.CHECKPOINT_INTERVAL_PROPERTY) StorageFormat.create(io.trino.plugin.hive.metastore.StorageFormat.create) MetadataEntry.buildDeltaMetadataConfiguration(io.trino.plugin.deltalake.transactionlog.MetadataEntry.buildDeltaMetadataConfiguration) TupleDomain.withColumnDomains(io.trino.spi.predicate.TupleDomain.withColumnDomains) DELTA_LAKE_BAD_WRITE(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_BAD_WRITE) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) TupleDomain(io.trino.spi.predicate.TupleDomain) DeltaLakeTableProperties.getPartitionedBy(io.trino.plugin.deltalake.DeltaLakeTableProperties.getPartitionedBy) HiveWriteUtils.createDirectory(io.trino.plugin.hive.util.HiveWriteUtils.createDirectory) GENERIC_INTERNAL_ERROR(io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) SchemaTableName.schemaTableName(io.trino.spi.connector.SchemaTableName.schemaTableName) UUID.randomUUID(java.util.UUID.randomUUID) ProtocolEntry(io.trino.plugin.deltalake.transactionlog.ProtocolEntry) DeltaTableOptimizeHandle(io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle) Collections.unmodifiableMap(java.util.Collections.unmodifiableMap) CommitInfoEntry(io.trino.plugin.deltalake.transactionlog.CommitInfoEntry) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) TypeManager(io.trino.spi.type.TypeManager) Collections(java.util.Collections) NUMBER_OF_DISTINCT_VALUES_SUMMARY(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES_SUMMARY) DeltaTableOptimizeHandle(io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle) DeltaLakeTableExecuteHandle(io.trino.plugin.deltalake.procedure.DeltaLakeTableExecuteHandle) DataSize(io.airlift.units.DataSize)

Example 2 with DeltaTableOptimizeHandle

use of io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle in project trino by trinodb.

the class DeltaLakeMetadata method beginOptimize.

private BeginTableExecuteResult<ConnectorTableExecuteHandle, ConnectorTableHandle> beginOptimize(ConnectorSession session, DeltaLakeTableExecuteHandle executeHandle, DeltaLakeTableHandle table) {
    DeltaTableOptimizeHandle optimizeHandle = (DeltaTableOptimizeHandle) executeHandle.getProcedureHandle();
    if (!allowWrite(session, table)) {
        String fileSystem = new Path(table.getLocation()).toUri().getScheme();
        throw new TrinoException(NOT_SUPPORTED, format("Optimize is not supported on the %s filesystem", fileSystem));
    }
    checkSupportedWriterVersion(session, table.getSchemaTableName());
    return new BeginTableExecuteResult<>(executeHandle.withProcedureHandle(optimizeHandle.withCurrentVersion(table.getReadVersion())), table.forOptimize(true, optimizeHandle.getMaxScannedFileSize()));
}
Also used : DeltaTableOptimizeHandle(io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle) Path(org.apache.hadoop.fs.Path) TrinoException(io.trino.spi.TrinoException) BeginTableExecuteResult(io.trino.spi.connector.BeginTableExecuteResult)

Example 3 with DeltaTableOptimizeHandle

use of io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle in project trino by trinodb.

the class DeltaLakeMetadata method finishOptimize.

private void finishOptimize(ConnectorSession session, DeltaLakeTableExecuteHandle executeHandle, Collection<Slice> fragments, List<Object> splitSourceInfo) {
    DeltaTableOptimizeHandle optimizeHandle = (DeltaTableOptimizeHandle) executeHandle.getProcedureHandle();
    long readVersion = optimizeHandle.getCurrentVersion().orElseThrow(() -> new IllegalArgumentException("currentVersion not set"));
    Optional<Long> checkpointInterval = optimizeHandle.getMetadataEntry().getCheckpointInterval();
    String tableLocation = executeHandle.getTableLocation();
    // paths to be deleted
    Set<Path> scannedPaths = splitSourceInfo.stream().map(file -> new Path((String) file)).collect(toImmutableSet());
    // files to be added
    List<DataFileInfo> dataFileInfos = fragments.stream().map(Slice::getBytes).map(dataFileInfoCodec::fromJson).collect(toImmutableList());
    boolean writeCommitted = false;
    try {
        TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriter(session, tableLocation);
        long createdTime = Instant.now().toEpochMilli();
        long commitVersion = readVersion + 1;
        transactionLogWriter.appendCommitInfoEntry(new CommitInfoEntry(commitVersion, createdTime, session.getUser(), session.getUser(), OPTIMIZE_OPERATION, ImmutableMap.of("queryId", session.getQueryId()), null, null, "trino-" + nodeVersion + "-" + nodeId, readVersion, ISOLATION_LEVEL, true));
        // TODO: Delta writes another field "operationMetrics" that I haven't
        // seen before. It contains delete/update metrics. Investigate/include it.
        long writeTimestamp = Instant.now().toEpochMilli();
        for (Path scannedPath : scannedPaths) {
            String relativePath = new Path(tableLocation).toUri().relativize(scannedPath.toUri()).toString();
            transactionLogWriter.appendRemoveFileEntry(new RemoveFileEntry(relativePath, writeTimestamp, false));
        }
        // Note: during writes we want to preserve original case of partition columns
        List<String> partitionColumns = optimizeHandle.getMetadataEntry().getOriginalPartitionColumns();
        appendAddFileEntries(transactionLogWriter, dataFileInfos, partitionColumns, false);
        transactionLogWriter.flush();
        writeCommitted = true;
        writeCheckpointIfNeeded(session, executeHandle.getSchemaTableName(), checkpointInterval, commitVersion);
    } catch (Exception e) {
        if (!writeCommitted) {
            // TODO perhaps it should happen in a background thread
            cleanupFailedWrite(session, tableLocation, dataFileInfos);
        }
        throw new TrinoException(DELTA_LAKE_BAD_WRITE, "Failed to write Delta Lake transaction log entry", e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TransactionLogUtil.getTransactionLogDir(io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogDir) FileSystem(org.apache.hadoop.fs.FileSystem) TableSnapshot(io.trino.plugin.deltalake.transactionlog.TableSnapshot) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) FileStatus(org.apache.hadoop.fs.FileStatus) DeltaLakeSchemaSupport.validateType(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.validateType) TypeUtils.isFloatingPointNaN(io.trino.spi.type.TypeUtils.isFloatingPointNaN) RemoveFileEntry(io.trino.plugin.deltalake.transactionlog.RemoveFileEntry) ConnectorTableExecuteHandle(io.trino.spi.connector.ConnectorTableExecuteHandle) Collections.singletonList(java.util.Collections.singletonList) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TransactionLogWriterFactory(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriterFactory) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) ValueSet.ofRanges(io.trino.spi.predicate.ValueSet.ofRanges) Column(io.trino.plugin.hive.metastore.Column) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) PARTITIONED_BY_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.PARTITIONED_BY_PROPERTY) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) PRESTO_QUERY_ID_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME) ENGLISH(java.util.Locale.ENGLISH) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) Table(io.trino.plugin.hive.metastore.Table) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TABLE_PROVIDER_PROPERTY(io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_PROPERTY) HiveWriteUtils.pathExists(io.trino.plugin.hive.util.HiveWriteUtils.pathExists) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) SchemaTableName(io.trino.spi.connector.SchemaTableName) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) TrinoPrincipal(io.trino.spi.security.TrinoPrincipal) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) HyperLogLog(io.airlift.stats.cardinality.HyperLogLog) DateTimeEncoding.unpackMillisUtc(io.trino.spi.type.DateTimeEncoding.unpackMillisUtc) FILE_MODIFIED_TIME_COLUMN_NAME(io.trino.plugin.deltalake.DeltaLakeColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME) Predicate.not(java.util.function.Predicate.not) TableColumnsMetadata(io.trino.spi.connector.TableColumnsMetadata) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) ANALYZE_COLUMNS_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.ANALYZE_COLUMNS_PROPERTY) REGULAR(io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR) TransactionLogParser.getMandatoryCurrentVersion(io.trino.plugin.deltalake.transactionlog.TransactionLogParser.getMandatoryCurrentVersion) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) Iterables(com.google.common.collect.Iterables) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) DeltaLakeColumnHandle.fileSizeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.fileSizeColumnHandle) Slice(io.airlift.slice.Slice) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) DeltaLakeTableProcedureId(io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId) INVALID_ANALYZE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Variable(io.trino.spi.expression.Variable) DeltaLakeTableProperties.getLocation(io.trino.plugin.deltalake.DeltaLakeTableProperties.getLocation) Range.greaterThanOrEqual(io.trino.spi.predicate.Range.greaterThanOrEqual) TransactionConflictException(io.trino.plugin.deltalake.transactionlog.writer.TransactionConflictException) HiveType(io.trino.plugin.hive.HiveType) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) DeltaLakeStatisticsAccess(io.trino.plugin.deltalake.statistics.DeltaLakeStatisticsAccess) DeltaLakeSchemaSupport.extractPartitionColumns(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractPartitionColumns) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) INVALID_TABLE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) DeltaLakeSchemaSupport.serializeStatsAsJson(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeStatsAsJson) Nullable(javax.annotation.Nullable) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) MapType(io.trino.spi.type.MapType) PARTITION_KEY(io.trino.plugin.deltalake.DeltaLakeColumnType.PARTITION_KEY) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) DELTA_LAKE_INVALID_SCHEMA(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA) CheckpointWriterManager(io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointWriterManager) ROW_ID_COLUMN_TYPE(io.trino.plugin.deltalake.DeltaLakeColumnHandle.ROW_ID_COLUMN_TYPE) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) HiveUtil.isHiveSystemSchema(io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) MAX_VALUE(io.trino.spi.statistics.ColumnStatisticType.MAX_VALUE) DeltaLakeSessionProperties.isTableStatisticsEnabled(io.trino.plugin.deltalake.DeltaLakeSessionProperties.isTableStatisticsEnabled) LOCATION_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.LOCATION_PROPERTY) TableStatisticsMetadata(io.trino.spi.statistics.TableStatisticsMetadata) TINYINT(io.trino.spi.type.TinyintType.TINYINT) NotADeltaLakeTableException(io.trino.plugin.deltalake.metastore.NotADeltaLakeTableException) DeltaLakeStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeStatistics) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) NodeManager(io.trino.spi.NodeManager) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) Database(io.trino.plugin.hive.metastore.Database) DeltaLakeSchemaSupport.extractSchema(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractSchema) SYNTHESIZED(io.trino.plugin.deltalake.DeltaLakeColumnType.SYNTHESIZED) TABLE_PROVIDER_VALUE(io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_VALUE) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) DeltaLakeMetastore(io.trino.plugin.deltalake.metastore.DeltaLakeMetastore) Format(io.trino.plugin.deltalake.transactionlog.MetadataEntry.Format) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Locale(java.util.Locale) CatalogSchemaName(io.trino.spi.connector.CatalogSchemaName) Path(org.apache.hadoop.fs.Path) HyperLogLogType(io.trino.spi.type.HyperLogLogType) INTEGER(io.trino.spi.type.IntegerType.INTEGER) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) RowType(io.trino.spi.type.RowType) Range.range(io.trino.spi.predicate.Range.range) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) HiveWriteUtils.isS3FileSystem(io.trino.plugin.hive.util.HiveWriteUtils.isS3FileSystem) TransactionLogWriter(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriter) Collection(java.util.Collection) DeltaLakeTableExecuteHandle(io.trino.plugin.deltalake.procedure.DeltaLakeTableExecuteHandle) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) Instant(java.time.Instant) ConnectorOutputMetadata(io.trino.spi.connector.ConnectorOutputMetadata) Sets(com.google.common.collect.Sets) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ROW_ID_COLUMN_NAME(io.trino.plugin.deltalake.DeltaLakeColumnHandle.ROW_ID_COLUMN_NAME) INVALID_SCHEMA_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) MetastoreUtil.buildInitialPrivilegeSet(io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet) Assignment(io.trino.spi.connector.Assignment) BeginTableExecuteResult(io.trino.spi.connector.BeginTableExecuteResult) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) DecimalType(io.trino.spi.type.DecimalType) OPTIMIZE(io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId.OPTIMIZE) JsonCodec(io.airlift.json.JsonCodec) Comparators(com.google.common.collect.Comparators) Constraint(io.trino.spi.connector.Constraint) Range.lessThanOrEqual(io.trino.spi.predicate.Range.lessThanOrEqual) DeltaLakeFileStatistics(io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics) Logger(io.airlift.log.Logger) DeltaLakeSchemaSupport.serializeSchemaAsJson(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeSchemaAsJson) DeltaLakeColumnStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeColumnStatistics) Type(io.trino.spi.type.Type) HashMap(java.util.HashMap) DeltaLakeColumnHandle.pathColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.pathColumnHandle) DeltaLakeColumnHandle.fileModifiedTimeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.fileModifiedTimeColumnHandle) AtomicReference(java.util.concurrent.atomic.AtomicReference) VarcharType(io.trino.spi.type.VarcharType) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeSessionProperties.isExtendedStatisticsEnabled(io.trino.plugin.deltalake.DeltaLakeSessionProperties.isExtendedStatisticsEnabled) VIRTUAL_VIEW(org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW) CHECKPOINT_INTERVAL_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.CHECKPOINT_INTERVAL_PROPERTY) StorageFormat.create(io.trino.plugin.hive.metastore.StorageFormat.create) MetadataEntry.buildDeltaMetadataConfiguration(io.trino.plugin.deltalake.transactionlog.MetadataEntry.buildDeltaMetadataConfiguration) TupleDomain.withColumnDomains(io.trino.spi.predicate.TupleDomain.withColumnDomains) DELTA_LAKE_BAD_WRITE(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_BAD_WRITE) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) TupleDomain(io.trino.spi.predicate.TupleDomain) DeltaLakeTableProperties.getPartitionedBy(io.trino.plugin.deltalake.DeltaLakeTableProperties.getPartitionedBy) HiveWriteUtils.createDirectory(io.trino.plugin.hive.util.HiveWriteUtils.createDirectory) GENERIC_INTERNAL_ERROR(io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) SchemaTableName.schemaTableName(io.trino.spi.connector.SchemaTableName.schemaTableName) UUID.randomUUID(java.util.UUID.randomUUID) ProtocolEntry(io.trino.plugin.deltalake.transactionlog.ProtocolEntry) DeltaTableOptimizeHandle(io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle) Collections.unmodifiableMap(java.util.Collections.unmodifiableMap) CommitInfoEntry(io.trino.plugin.deltalake.transactionlog.CommitInfoEntry) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) TypeManager(io.trino.spi.type.TypeManager) Collections(java.util.Collections) NUMBER_OF_DISTINCT_VALUES_SUMMARY(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES_SUMMARY) CommitInfoEntry(io.trino.plugin.deltalake.transactionlog.CommitInfoEntry) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) TransactionConflictException(io.trino.plugin.deltalake.transactionlog.writer.TransactionConflictException) IOException(java.io.IOException) NotADeltaLakeTableException(io.trino.plugin.deltalake.metastore.NotADeltaLakeTableException) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) TrinoException(io.trino.spi.TrinoException) FileNotFoundException(java.io.FileNotFoundException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) DeltaTableOptimizeHandle(io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle) Slice(io.airlift.slice.Slice) TrinoException(io.trino.spi.TrinoException) TransactionLogWriter(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriter) RemoveFileEntry(io.trino.plugin.deltalake.transactionlog.RemoveFileEntry)

Example 4 with DeltaTableOptimizeHandle

use of io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle in project trino by trinodb.

the class DeltaLakeMetadata method getLayoutForOptimize.

private Optional<ConnectorTableLayout> getLayoutForOptimize(DeltaLakeTableExecuteHandle executeHandle) {
    DeltaTableOptimizeHandle optimizeHandle = (DeltaTableOptimizeHandle) executeHandle.getProcedureHandle();
    List<String> partitionColumnNames = optimizeHandle.getMetadataEntry().getCanonicalPartitionColumns();
    if (partitionColumnNames.isEmpty()) {
        return Optional.empty();
    }
    Map<String, DeltaLakeColumnHandle> columnsByName = optimizeHandle.getTableColumns().stream().collect(toImmutableMap(columnHandle -> columnHandle.getName().toLowerCase(Locale.ENGLISH), identity()));
    ImmutableList.Builder<DeltaLakeColumnHandle> partitioningColumns = ImmutableList.builder();
    for (String columnName : partitionColumnNames) {
        partitioningColumns.add(columnsByName.get(columnName));
    }
    DeltaLakePartitioningHandle partitioningHandle = new DeltaLakePartitioningHandle(partitioningColumns.build());
    return Optional.of(new ConnectorTableLayout(partitioningHandle, partitionColumnNames));
}
Also used : DeltaTableOptimizeHandle(io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle) TransactionLogUtil.getTransactionLogDir(io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogDir) FileSystem(org.apache.hadoop.fs.FileSystem) TableSnapshot(io.trino.plugin.deltalake.transactionlog.TableSnapshot) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) FileStatus(org.apache.hadoop.fs.FileStatus) DeltaLakeSchemaSupport.validateType(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.validateType) TypeUtils.isFloatingPointNaN(io.trino.spi.type.TypeUtils.isFloatingPointNaN) RemoveFileEntry(io.trino.plugin.deltalake.transactionlog.RemoveFileEntry) ConnectorTableExecuteHandle(io.trino.spi.connector.ConnectorTableExecuteHandle) Collections.singletonList(java.util.Collections.singletonList) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TransactionLogWriterFactory(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriterFactory) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) TimestampWithTimeZoneType(io.trino.spi.type.TimestampWithTimeZoneType) ValueSet.ofRanges(io.trino.spi.predicate.ValueSet.ofRanges) Column(io.trino.plugin.hive.metastore.Column) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) PARTITIONED_BY_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.PARTITIONED_BY_PROPERTY) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) PRESTO_QUERY_ID_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME) ENGLISH(java.util.Locale.ENGLISH) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) Table(io.trino.plugin.hive.metastore.Table) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TABLE_PROVIDER_PROPERTY(io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_PROPERTY) HiveWriteUtils.pathExists(io.trino.plugin.hive.util.HiveWriteUtils.pathExists) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) SchemaTableName(io.trino.spi.connector.SchemaTableName) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) TrinoPrincipal(io.trino.spi.security.TrinoPrincipal) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) HyperLogLog(io.airlift.stats.cardinality.HyperLogLog) DateTimeEncoding.unpackMillisUtc(io.trino.spi.type.DateTimeEncoding.unpackMillisUtc) FILE_MODIFIED_TIME_COLUMN_NAME(io.trino.plugin.deltalake.DeltaLakeColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME) Predicate.not(java.util.function.Predicate.not) TableColumnsMetadata(io.trino.spi.connector.TableColumnsMetadata) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) ANALYZE_COLUMNS_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.ANALYZE_COLUMNS_PROPERTY) REGULAR(io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR) TransactionLogParser.getMandatoryCurrentVersion(io.trino.plugin.deltalake.transactionlog.TransactionLogParser.getMandatoryCurrentVersion) DATE(io.trino.spi.type.DateType.DATE) REAL(io.trino.spi.type.RealType.REAL) Iterables(com.google.common.collect.Iterables) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) DeltaLakeColumnHandle.fileSizeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.fileSizeColumnHandle) Slice(io.airlift.slice.Slice) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) DeltaLakeTableProcedureId(io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId) INVALID_ANALYZE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Variable(io.trino.spi.expression.Variable) DeltaLakeTableProperties.getLocation(io.trino.plugin.deltalake.DeltaLakeTableProperties.getLocation) Range.greaterThanOrEqual(io.trino.spi.predicate.Range.greaterThanOrEqual) TransactionConflictException(io.trino.plugin.deltalake.transactionlog.writer.TransactionConflictException) HiveType(io.trino.plugin.hive.HiveType) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) DeltaLakeStatisticsAccess(io.trino.plugin.deltalake.statistics.DeltaLakeStatisticsAccess) DeltaLakeSchemaSupport.extractPartitionColumns(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractPartitionColumns) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) INVALID_TABLE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) DeltaLakeSchemaSupport.serializeStatsAsJson(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeStatsAsJson) Nullable(javax.annotation.Nullable) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) MapType(io.trino.spi.type.MapType) PARTITION_KEY(io.trino.plugin.deltalake.DeltaLakeColumnType.PARTITION_KEY) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) DELTA_LAKE_INVALID_SCHEMA(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA) CheckpointWriterManager(io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointWriterManager) ROW_ID_COLUMN_TYPE(io.trino.plugin.deltalake.DeltaLakeColumnHandle.ROW_ID_COLUMN_TYPE) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) HiveUtil.isHiveSystemSchema(io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) MAX_VALUE(io.trino.spi.statistics.ColumnStatisticType.MAX_VALUE) DeltaLakeSessionProperties.isTableStatisticsEnabled(io.trino.plugin.deltalake.DeltaLakeSessionProperties.isTableStatisticsEnabled) LOCATION_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.LOCATION_PROPERTY) TableStatisticsMetadata(io.trino.spi.statistics.TableStatisticsMetadata) TINYINT(io.trino.spi.type.TinyintType.TINYINT) NotADeltaLakeTableException(io.trino.plugin.deltalake.metastore.NotADeltaLakeTableException) DeltaLakeStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeStatistics) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) NodeManager(io.trino.spi.NodeManager) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) Database(io.trino.plugin.hive.metastore.Database) DeltaLakeSchemaSupport.extractSchema(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractSchema) SYNTHESIZED(io.trino.plugin.deltalake.DeltaLakeColumnType.SYNTHESIZED) TABLE_PROVIDER_VALUE(io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_VALUE) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) DeltaLakeMetastore(io.trino.plugin.deltalake.metastore.DeltaLakeMetastore) Format(io.trino.plugin.deltalake.transactionlog.MetadataEntry.Format) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Locale(java.util.Locale) CatalogSchemaName(io.trino.spi.connector.CatalogSchemaName) Path(org.apache.hadoop.fs.Path) HyperLogLogType(io.trino.spi.type.HyperLogLogType) INTEGER(io.trino.spi.type.IntegerType.INTEGER) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) RowType(io.trino.spi.type.RowType) Range.range(io.trino.spi.predicate.Range.range) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) HiveWriteUtils.isS3FileSystem(io.trino.plugin.hive.util.HiveWriteUtils.isS3FileSystem) TransactionLogWriter(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriter) Collection(java.util.Collection) DeltaLakeTableExecuteHandle(io.trino.plugin.deltalake.procedure.DeltaLakeTableExecuteHandle) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) Instant(java.time.Instant) ConnectorOutputMetadata(io.trino.spi.connector.ConnectorOutputMetadata) Sets(com.google.common.collect.Sets) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ROW_ID_COLUMN_NAME(io.trino.plugin.deltalake.DeltaLakeColumnHandle.ROW_ID_COLUMN_NAME) INVALID_SCHEMA_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) MetastoreUtil.buildInitialPrivilegeSet(io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet) Assignment(io.trino.spi.connector.Assignment) BeginTableExecuteResult(io.trino.spi.connector.BeginTableExecuteResult) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) DecimalType(io.trino.spi.type.DecimalType) OPTIMIZE(io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId.OPTIMIZE) JsonCodec(io.airlift.json.JsonCodec) Comparators(com.google.common.collect.Comparators) Constraint(io.trino.spi.connector.Constraint) Range.lessThanOrEqual(io.trino.spi.predicate.Range.lessThanOrEqual) DeltaLakeFileStatistics(io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics) Logger(io.airlift.log.Logger) DeltaLakeSchemaSupport.serializeSchemaAsJson(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.serializeSchemaAsJson) DeltaLakeColumnStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeColumnStatistics) Type(io.trino.spi.type.Type) HashMap(java.util.HashMap) DeltaLakeColumnHandle.pathColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.pathColumnHandle) DeltaLakeColumnHandle.fileModifiedTimeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle.fileModifiedTimeColumnHandle) AtomicReference(java.util.concurrent.atomic.AtomicReference) VarcharType(io.trino.spi.type.VarcharType) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeSessionProperties.isExtendedStatisticsEnabled(io.trino.plugin.deltalake.DeltaLakeSessionProperties.isExtendedStatisticsEnabled) VIRTUAL_VIEW(org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW) CHECKPOINT_INTERVAL_PROPERTY(io.trino.plugin.deltalake.DeltaLakeTableProperties.CHECKPOINT_INTERVAL_PROPERTY) StorageFormat.create(io.trino.plugin.hive.metastore.StorageFormat.create) MetadataEntry.buildDeltaMetadataConfiguration(io.trino.plugin.deltalake.transactionlog.MetadataEntry.buildDeltaMetadataConfiguration) TupleDomain.withColumnDomains(io.trino.spi.predicate.TupleDomain.withColumnDomains) DELTA_LAKE_BAD_WRITE(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_BAD_WRITE) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) TupleDomain(io.trino.spi.predicate.TupleDomain) DeltaLakeTableProperties.getPartitionedBy(io.trino.plugin.deltalake.DeltaLakeTableProperties.getPartitionedBy) HiveWriteUtils.createDirectory(io.trino.plugin.hive.util.HiveWriteUtils.createDirectory) GENERIC_INTERNAL_ERROR(io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) SchemaTableName.schemaTableName(io.trino.spi.connector.SchemaTableName.schemaTableName) UUID.randomUUID(java.util.UUID.randomUUID) ProtocolEntry(io.trino.plugin.deltalake.transactionlog.ProtocolEntry) DeltaTableOptimizeHandle(io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle) Collections.unmodifiableMap(java.util.Collections.unmodifiableMap) CommitInfoEntry(io.trino.plugin.deltalake.transactionlog.CommitInfoEntry) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) TypeManager(io.trino.spi.type.TypeManager) Collections(java.util.Collections) NUMBER_OF_DISTINCT_VALUES_SUMMARY(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES_SUMMARY) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList)

Aggregations

DeltaTableOptimizeHandle (io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle)4 TrinoException (io.trino.spi.TrinoException)4 BeginTableExecuteResult (io.trino.spi.connector.BeginTableExecuteResult)4 Path (org.apache.hadoop.fs.Path)4 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)3 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)3 Preconditions.checkState (com.google.common.base.Preconditions.checkState)3 Verify.verify (com.google.common.base.Verify.verify)3 Comparators (com.google.common.collect.Comparators)3 ImmutableList (com.google.common.collect.ImmutableList)3 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)3 ImmutableMap (com.google.common.collect.ImmutableMap)3 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)3 ImmutableSet (com.google.common.collect.ImmutableSet)3 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)3 Iterables (com.google.common.collect.Iterables)3 Sets (com.google.common.collect.Sets)3 JsonCodec (io.airlift.json.JsonCodec)3 Logger (io.airlift.log.Logger)3 Slice (io.airlift.slice.Slice)3