Search in sources :

Example 1 with IcebergTableExecuteHandle

use of io.trino.plugin.iceberg.procedure.IcebergTableExecuteHandle in project trino by trinodb.

the class IcebergMetadata method getTableHandleForOptimize.

private Optional<ConnectorTableExecuteHandle> getTableHandleForOptimize(ConnectorSession session, IcebergTableHandle tableHandle, Map<String, Object> executeProperties, RetryMode retryMode) {
    DataSize maxScannedFileSize = (DataSize) executeProperties.get("file_size_threshold");
    Table icebergTable = catalog.loadTable(session, tableHandle.getSchemaTableName());
    return Optional.of(new IcebergTableExecuteHandle(tableHandle.getSchemaTableName(), OPTIMIZE, new IcebergOptimizeHandle(SchemaParser.toJson(icebergTable.schema()), PartitionSpecParser.toJson(icebergTable.spec()), getColumns(icebergTable.schema(), typeManager), getFileFormat(icebergTable), icebergTable.properties(), maxScannedFileSize, retryMode != NO_RETRIES), icebergTable.location()));
}
Also used : IcebergOptimizeHandle(io.trino.plugin.iceberg.procedure.IcebergOptimizeHandle) Table(org.apache.iceberg.Table) ClassLoaderSafeSystemTable(io.trino.plugin.base.classloader.ClassLoaderSafeSystemTable) SystemTable(io.trino.spi.connector.SystemTable) IcebergTableExecuteHandle(io.trino.plugin.iceberg.procedure.IcebergTableExecuteHandle) DataSize(io.airlift.units.DataSize)

Example 2 with IcebergTableExecuteHandle

use of io.trino.plugin.iceberg.procedure.IcebergTableExecuteHandle in project trino by trinodb.

the class IcebergPageSinkProvider method createPageSink.

@Override
public ConnectorPageSink createPageSink(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableExecuteHandle tableExecuteHandle) {
    IcebergTableExecuteHandle executeHandle = (IcebergTableExecuteHandle) tableExecuteHandle;
    switch(executeHandle.getProcedureId()) {
        case OPTIMIZE:
            HdfsContext hdfsContext = new HdfsContext(session);
            IcebergOptimizeHandle optimizeHandle = (IcebergOptimizeHandle) executeHandle.getProcedureHandle();
            Schema schema = SchemaParser.fromJson(optimizeHandle.getSchemaAsJson());
            PartitionSpec partitionSpec = PartitionSpecParser.fromJson(schema, optimizeHandle.getPartitionSpecAsJson());
            LocationProvider locationProvider = getLocationProvider(executeHandle.getSchemaTableName(), executeHandle.getTableLocation(), optimizeHandle.getTableStorageProperties());
            return new IcebergPageSink(schema, partitionSpec, locationProvider, fileWriterFactory, pageIndexerFactory, hdfsEnvironment, hdfsContext, optimizeHandle.getTableColumns(), jsonCodec, session, optimizeHandle.getFileFormat(), optimizeHandle.getTableStorageProperties(), maxOpenPartitions);
    }
    throw new IllegalArgumentException("Unknown procedure: " + executeHandle.getProcedureId());
}
Also used : IcebergOptimizeHandle(io.trino.plugin.iceberg.procedure.IcebergOptimizeHandle) IcebergTableExecuteHandle(io.trino.plugin.iceberg.procedure.IcebergTableExecuteHandle) IcebergUtil.getLocationProvider(io.trino.plugin.iceberg.IcebergUtil.getLocationProvider) LocationProvider(org.apache.iceberg.io.LocationProvider) Schema(org.apache.iceberg.Schema) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) PartitionSpec(org.apache.iceberg.PartitionSpec)

Example 3 with IcebergTableExecuteHandle

use of io.trino.plugin.iceberg.procedure.IcebergTableExecuteHandle in project trino by trinodb.

the class IcebergMetadata method beginTableExecute.

@Override
public BeginTableExecuteResult<ConnectorTableExecuteHandle, ConnectorTableHandle> beginTableExecute(ConnectorSession session, ConnectorTableExecuteHandle tableExecuteHandle, ConnectorTableHandle updatedSourceTableHandle) {
    IcebergTableExecuteHandle executeHandle = (IcebergTableExecuteHandle) tableExecuteHandle;
    IcebergTableHandle table = (IcebergTableHandle) updatedSourceTableHandle;
    switch(executeHandle.getProcedureId()) {
        case OPTIMIZE:
            return beginOptimize(session, executeHandle, table);
    }
    throw new IllegalArgumentException("Unknown procedure '" + executeHandle.getProcedureId() + "'");
}
Also used : IcebergTableExecuteHandle(io.trino.plugin.iceberg.procedure.IcebergTableExecuteHandle)

Example 4 with IcebergTableExecuteHandle

use of io.trino.plugin.iceberg.procedure.IcebergTableExecuteHandle in project trino by trinodb.

the class IcebergMetadata method finishOptimize.

private void finishOptimize(ConnectorSession session, IcebergTableExecuteHandle executeHandle, Collection<Slice> fragments, List<Object> splitSourceInfo) {
    IcebergOptimizeHandle optimizeHandle = (IcebergOptimizeHandle) executeHandle.getProcedureHandle();
    Table icebergTable = transaction.table();
    // paths to be deleted
    Set<DataFile> scannedFiles = splitSourceInfo.stream().map(DataFile.class::cast).collect(toImmutableSet());
    List<CommitTaskData> commitTasks = fragments.stream().map(slice -> commitTaskCodec.fromJson(slice.getBytes())).collect(toImmutableList());
    Type[] partitionColumnTypes = icebergTable.spec().fields().stream().map(field -> field.transform().getResultType(icebergTable.schema().findType(field.sourceId()))).toArray(Type[]::new);
    Set<DataFile> newFiles = new HashSet<>();
    for (CommitTaskData task : commitTasks) {
        DataFiles.Builder builder = DataFiles.builder(icebergTable.spec()).withPath(task.getPath()).withFileSizeInBytes(task.getFileSizeInBytes()).withFormat(optimizeHandle.getFileFormat().toIceberg()).withMetrics(task.getMetrics().metrics());
        if (!icebergTable.spec().fields().isEmpty()) {
            String partitionDataJson = task.getPartitionDataJson().orElseThrow(() -> new VerifyException("No partition data for partitioned table"));
            builder.withPartition(PartitionData.fromJson(partitionDataJson, partitionColumnTypes));
        }
        newFiles.add(builder.build());
    }
    if (scannedFiles.isEmpty() && newFiles.isEmpty()) {
        // Table scan turned out to be empty, nothing to commit
        transaction = null;
        return;
    }
    // try to leave as little garbage as possible behind
    if (optimizeHandle.isRetriesEnabled()) {
        cleanExtraOutputFiles(session, newFiles.stream().map(dataFile -> dataFile.path().toString()).collect(toImmutableSet()));
    }
    RewriteFiles rewriteFiles = transaction.newRewrite();
    rewriteFiles.rewriteFiles(scannedFiles, newFiles);
    rewriteFiles.commit();
    transaction.commitTransaction();
    transaction = null;
}
Also used : IcebergUtil.getPartitionKeys(io.trino.plugin.iceberg.IcebergUtil.getPartitionKeys) TrinoCatalog(io.trino.plugin.iceberg.catalog.TrinoCatalog) FileSystem(org.apache.hadoop.fs.FileSystem) ConnectorTableExecuteHandle(io.trino.spi.connector.ConnectorTableExecuteHandle) HiveApplyProjectionUtil.replaceWithNewVariables(io.trino.plugin.hive.HiveApplyProjectionUtil.replaceWithNewVariables) Collections.singletonList(java.util.Collections.singletonList) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Matcher(java.util.regex.Matcher) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) RewriteFiles(org.apache.iceberg.RewriteFiles) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) CloseableIterable(org.apache.iceberg.io.CloseableIterable) IcebergUtil.newCreateTableTransaction(io.trino.plugin.iceberg.IcebergUtil.newCreateTableTransaction) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) IcebergTableExecuteHandle(io.trino.plugin.iceberg.procedure.IcebergTableExecuteHandle) Set(java.util.Set) Schema(org.apache.iceberg.Schema) ColumnIdentity.primitiveColumnIdentity(io.trino.plugin.iceberg.ColumnIdentity.primitiveColumnIdentity) SchemaTableName(io.trino.spi.connector.SchemaTableName) PartitionSpecParser(org.apache.iceberg.PartitionSpecParser) Collectors.joining(java.util.stream.Collectors.joining) Type(org.apache.iceberg.types.Type) UncheckedIOException(java.io.UncheckedIOException) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) TrinoPrincipal(io.trino.spi.security.TrinoPrincipal) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) Iterables(com.google.common.collect.Iterables) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) IcebergUtil.deserializePartitionValue(io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue) Slice(io.airlift.slice.Slice) NullableValue(io.trino.spi.predicate.NullableValue) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Variable(io.trino.spi.expression.Variable) Supplier(java.util.function.Supplier) OptionalLong(java.util.OptionalLong) MaterializedViewFreshness(io.trino.spi.connector.MaterializedViewFreshness) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) FILE_FORMAT_PROPERTY(io.trino.plugin.iceberg.IcebergTableProperties.FILE_FORMAT_PROPERTY) OPTIMIZE(io.trino.plugin.iceberg.procedure.IcebergTableProcedureId.OPTIMIZE) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) DEPENDS_ON_TABLES(io.trino.plugin.iceberg.catalog.hms.TrinoHiveCatalog.DEPENDS_ON_TABLES) Table(org.apache.iceberg.Table) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) ICEBERG_INVALID_METADATA(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_INVALID_METADATA) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) DiscretePredicates(io.trino.spi.connector.DiscretePredicates) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) PartitionFields.parsePartitionFields(io.trino.plugin.iceberg.PartitionFields.parsePartitionFields) ArrayDeque(java.util.ArrayDeque) MaterializedViewNotFoundException(io.trino.spi.connector.MaterializedViewNotFoundException) TypeConverter.toTrinoType(io.trino.plugin.iceberg.TypeConverter.toTrinoType) IcebergUtil.getColumns(io.trino.plugin.iceberg.IcebergUtil.getColumns) IcebergSessionProperties.isProjectionPushdownEnabled(io.trino.plugin.iceberg.IcebergSessionProperties.isProjectionPushdownEnabled) IcebergTableProcedureId(io.trino.plugin.iceberg.procedure.IcebergTableProcedureId) IcebergSessionProperties.isStatisticsEnabled(io.trino.plugin.iceberg.IcebergSessionProperties.isStatisticsEnabled) AppendFiles(org.apache.iceberg.AppendFiles) NO_RETRIES(io.trino.spi.connector.RetryMode.NO_RETRIES) ConnectorMaterializedViewDefinition(io.trino.spi.connector.ConnectorMaterializedViewDefinition) TypeConverter.toIcebergType(io.trino.plugin.iceberg.TypeConverter.toIcebergType) PartitionField(org.apache.iceberg.PartitionField) DataFiles(org.apache.iceberg.DataFiles) CatalogSchemaName(io.trino.spi.connector.CatalogSchemaName) LOCATION_PROPERTY(io.trino.plugin.iceberg.IcebergTableProperties.LOCATION_PROPERTY) Path(org.apache.hadoop.fs.Path) DATA(io.trino.plugin.iceberg.TableType.DATA) ConnectorViewDefinition(io.trino.spi.connector.ConnectorViewDefinition) FileScanTask(org.apache.iceberg.FileScanTask) DataFile(org.apache.iceberg.DataFile) Splitter(com.google.common.base.Splitter) IcebergTableProperties.getPartitioning(io.trino.plugin.iceberg.IcebergTableProperties.getPartitioning) IcebergUtil.getFileFormat(io.trino.plugin.iceberg.IcebergUtil.getFileFormat) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) HiveWrittenPartitions(io.trino.plugin.hive.HiveWrittenPartitions) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) TrinoException(io.trino.spi.TrinoException) TableScan(org.apache.iceberg.TableScan) ConnectorOutputMetadata(io.trino.spi.connector.ConnectorOutputMetadata) String.format(java.lang.String.format) SchemaParser(org.apache.iceberg.SchemaParser) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) ClassLoaderSafeSystemTable(io.trino.plugin.base.classloader.ClassLoaderSafeSystemTable) IcebergUtil.getTableComment(io.trino.plugin.iceberg.IcebergUtil.getTableComment) Assignment(io.trino.spi.connector.Assignment) HiveApplyProjectionUtil(io.trino.plugin.hive.HiveApplyProjectionUtil) BeginTableExecuteResult(io.trino.spi.connector.BeginTableExecuteResult) PartitionSpec(org.apache.iceberg.PartitionSpec) Function.identity(java.util.function.Function.identity) TableProperties(org.apache.iceberg.TableProperties) Optional(java.util.Optional) ProjectedColumnRepresentation(io.trino.plugin.hive.HiveApplyProjectionUtil.ProjectedColumnRepresentation) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) Pattern(java.util.regex.Pattern) SystemTable(io.trino.spi.connector.SystemTable) JsonCodec(io.airlift.json.JsonCodec) Constraint(io.trino.spi.connector.Constraint) IcebergOptimizeHandle(io.trino.plugin.iceberg.procedure.IcebergOptimizeHandle) Logger(io.airlift.log.Logger) PartitionFields.toPartitionFields(io.trino.plugin.iceberg.PartitionFields.toPartitionFields) HashMap(java.util.HashMap) Deque(java.util.Deque) Function(java.util.function.Function) ExpressionConverter.toIcebergExpression(io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression) PARTITIONING_PROPERTY(io.trino.plugin.iceberg.IcebergTableProperties.PARTITIONING_PROPERTY) HashSet(java.util.HashSet) BiPredicate(java.util.function.BiPredicate) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) TableStatistics(io.trino.spi.statistics.TableStatistics) HiveApplyProjectionUtil.extractSupportedProjectedColumns(io.trino.plugin.hive.HiveApplyProjectionUtil.extractSupportedProjectedColumns) VerifyException(com.google.common.base.VerifyException) RetryMode(io.trino.spi.connector.RetryMode) Iterator(java.util.Iterator) TupleDomain(io.trino.spi.predicate.TupleDomain) IcebergUtil.toIcebergSchema(io.trino.plugin.iceberg.IcebergUtil.toIcebergSchema) Transaction(org.apache.iceberg.Transaction) Comparator(java.util.Comparator) TypeManager(io.trino.spi.type.TypeManager) HiveUtil.isStructuralType(io.trino.plugin.hive.util.HiveUtil.isStructuralType) Snapshot(org.apache.iceberg.Snapshot) Table(org.apache.iceberg.Table) ClassLoaderSafeSystemTable(io.trino.plugin.base.classloader.ClassLoaderSafeSystemTable) SystemTable(io.trino.spi.connector.SystemTable) DataFiles(org.apache.iceberg.DataFiles) DataFile(org.apache.iceberg.DataFile) IcebergOptimizeHandle(io.trino.plugin.iceberg.procedure.IcebergOptimizeHandle) Type(org.apache.iceberg.types.Type) TypeConverter.toTrinoType(io.trino.plugin.iceberg.TypeConverter.toTrinoType) TypeConverter.toIcebergType(io.trino.plugin.iceberg.TypeConverter.toIcebergType) HiveUtil.isStructuralType(io.trino.plugin.hive.util.HiveUtil.isStructuralType) VerifyException(com.google.common.base.VerifyException) RewriteFiles(org.apache.iceberg.RewriteFiles) HashSet(java.util.HashSet)

Aggregations

IcebergTableExecuteHandle (io.trino.plugin.iceberg.procedure.IcebergTableExecuteHandle)4 IcebergOptimizeHandle (io.trino.plugin.iceberg.procedure.IcebergOptimizeHandle)3 DataSize (io.airlift.units.DataSize)2 ClassLoaderSafeSystemTable (io.trino.plugin.base.classloader.ClassLoaderSafeSystemTable)2 HdfsContext (io.trino.plugin.hive.HdfsEnvironment.HdfsContext)2 PartitionSpec (org.apache.iceberg.PartitionSpec)2 Schema (org.apache.iceberg.Schema)2 Splitter (com.google.common.base.Splitter)1 Suppliers (com.google.common.base.Suppliers)1 Verify.verify (com.google.common.base.Verify.verify)1 VerifyException (com.google.common.base.VerifyException)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)1 Iterables (com.google.common.collect.Iterables)1 JsonCodec (io.airlift.json.JsonCodec)1 Logger (io.airlift.log.Logger)1