Search in sources :

Example 1 with HiveApplyProjectionUtil

use of io.trino.plugin.hive.HiveApplyProjectionUtil in project trino by trinodb.

the class IcebergMetadata method applyProjection.

@Override
public Optional<ProjectionApplicationResult<ConnectorTableHandle>> applyProjection(ConnectorSession session, ConnectorTableHandle handle, List<ConnectorExpression> projections, Map<String, ColumnHandle> assignments) {
    if (!isProjectionPushdownEnabled(session)) {
        return Optional.empty();
    }
    // Create projected column representations for supported sub expressions. Simple column references and chain of
    // dereferences on a variable are supported right now.
    Set<ConnectorExpression> projectedExpressions = projections.stream().flatMap(expression -> extractSupportedProjectedColumns(expression).stream()).collect(toImmutableSet());
    Map<ConnectorExpression, ProjectedColumnRepresentation> columnProjections = projectedExpressions.stream().collect(toImmutableMap(Function.identity(), HiveApplyProjectionUtil::createProjectedColumnRepresentation));
    IcebergTableHandle icebergTableHandle = (IcebergTableHandle) handle;
    // all references are simple variables
    if (columnProjections.values().stream().allMatch(ProjectedColumnRepresentation::isVariable)) {
        Set<IcebergColumnHandle> projectedColumns = assignments.values().stream().map(IcebergColumnHandle.class::cast).collect(toImmutableSet());
        if (icebergTableHandle.getProjectedColumns().equals(projectedColumns)) {
            return Optional.empty();
        }
        List<Assignment> assignmentsList = assignments.entrySet().stream().map(assignment -> new Assignment(assignment.getKey(), assignment.getValue(), ((IcebergColumnHandle) assignment.getValue()).getType())).collect(toImmutableList());
        return Optional.of(new ProjectionApplicationResult<>(icebergTableHandle.withProjectedColumns(projectedColumns), projections, assignmentsList, false));
    }
    Map<String, Assignment> newAssignments = new HashMap<>();
    ImmutableMap.Builder<ConnectorExpression, Variable> newVariablesBuilder = ImmutableMap.builder();
    ImmutableSet.Builder<IcebergColumnHandle> projectedColumnsBuilder = ImmutableSet.builder();
    for (Map.Entry<ConnectorExpression, ProjectedColumnRepresentation> entry : columnProjections.entrySet()) {
        ConnectorExpression expression = entry.getKey();
        ProjectedColumnRepresentation projectedColumn = entry.getValue();
        IcebergColumnHandle baseColumnHandle = (IcebergColumnHandle) assignments.get(projectedColumn.getVariable().getName());
        IcebergColumnHandle projectedColumnHandle = createProjectedColumnHandle(baseColumnHandle, projectedColumn.getDereferenceIndices(), expression.getType());
        String projectedColumnName = projectedColumnHandle.getQualifiedName();
        Variable projectedColumnVariable = new Variable(projectedColumnName, expression.getType());
        Assignment newAssignment = new Assignment(projectedColumnName, projectedColumnHandle, expression.getType());
        newAssignments.putIfAbsent(projectedColumnName, newAssignment);
        newVariablesBuilder.put(expression, projectedColumnVariable);
        projectedColumnsBuilder.add(projectedColumnHandle);
    }
    // Modify projections to refer to new variables
    Map<ConnectorExpression, Variable> newVariables = newVariablesBuilder.buildOrThrow();
    List<ConnectorExpression> newProjections = projections.stream().map(expression -> replaceWithNewVariables(expression, newVariables)).collect(toImmutableList());
    List<Assignment> outputAssignments = newAssignments.values().stream().collect(toImmutableList());
    return Optional.of(new ProjectionApplicationResult<>(icebergTableHandle.withProjectedColumns(projectedColumnsBuilder.build()), newProjections, outputAssignments, false));
}
Also used : IcebergUtil.getPartitionKeys(io.trino.plugin.iceberg.IcebergUtil.getPartitionKeys) TrinoCatalog(io.trino.plugin.iceberg.catalog.TrinoCatalog) FileSystem(org.apache.hadoop.fs.FileSystem) ConnectorTableExecuteHandle(io.trino.spi.connector.ConnectorTableExecuteHandle) HiveApplyProjectionUtil.replaceWithNewVariables(io.trino.plugin.hive.HiveApplyProjectionUtil.replaceWithNewVariables) Collections.singletonList(java.util.Collections.singletonList) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Matcher(java.util.regex.Matcher) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) RewriteFiles(org.apache.iceberg.RewriteFiles) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) CloseableIterable(org.apache.iceberg.io.CloseableIterable) IcebergUtil.newCreateTableTransaction(io.trino.plugin.iceberg.IcebergUtil.newCreateTableTransaction) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) IcebergTableExecuteHandle(io.trino.plugin.iceberg.procedure.IcebergTableExecuteHandle) Set(java.util.Set) Schema(org.apache.iceberg.Schema) ColumnIdentity.primitiveColumnIdentity(io.trino.plugin.iceberg.ColumnIdentity.primitiveColumnIdentity) SchemaTableName(io.trino.spi.connector.SchemaTableName) PartitionSpecParser(org.apache.iceberg.PartitionSpecParser) Collectors.joining(java.util.stream.Collectors.joining) Type(org.apache.iceberg.types.Type) UncheckedIOException(java.io.UncheckedIOException) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) TrinoPrincipal(io.trino.spi.security.TrinoPrincipal) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) Iterables(com.google.common.collect.Iterables) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) IcebergUtil.deserializePartitionValue(io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue) Slice(io.airlift.slice.Slice) NullableValue(io.trino.spi.predicate.NullableValue) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Variable(io.trino.spi.expression.Variable) Supplier(java.util.function.Supplier) OptionalLong(java.util.OptionalLong) MaterializedViewFreshness(io.trino.spi.connector.MaterializedViewFreshness) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) FILE_FORMAT_PROPERTY(io.trino.plugin.iceberg.IcebergTableProperties.FILE_FORMAT_PROPERTY) OPTIMIZE(io.trino.plugin.iceberg.procedure.IcebergTableProcedureId.OPTIMIZE) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) DEPENDS_ON_TABLES(io.trino.plugin.iceberg.catalog.hms.TrinoHiveCatalog.DEPENDS_ON_TABLES) Table(org.apache.iceberg.Table) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) ICEBERG_INVALID_METADATA(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_INVALID_METADATA) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) DiscretePredicates(io.trino.spi.connector.DiscretePredicates) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) PartitionFields.parsePartitionFields(io.trino.plugin.iceberg.PartitionFields.parsePartitionFields) ArrayDeque(java.util.ArrayDeque) MaterializedViewNotFoundException(io.trino.spi.connector.MaterializedViewNotFoundException) TypeConverter.toTrinoType(io.trino.plugin.iceberg.TypeConverter.toTrinoType) IcebergUtil.getColumns(io.trino.plugin.iceberg.IcebergUtil.getColumns) IcebergSessionProperties.isProjectionPushdownEnabled(io.trino.plugin.iceberg.IcebergSessionProperties.isProjectionPushdownEnabled) IcebergTableProcedureId(io.trino.plugin.iceberg.procedure.IcebergTableProcedureId) IcebergSessionProperties.isStatisticsEnabled(io.trino.plugin.iceberg.IcebergSessionProperties.isStatisticsEnabled) AppendFiles(org.apache.iceberg.AppendFiles) NO_RETRIES(io.trino.spi.connector.RetryMode.NO_RETRIES) ConnectorMaterializedViewDefinition(io.trino.spi.connector.ConnectorMaterializedViewDefinition) TypeConverter.toIcebergType(io.trino.plugin.iceberg.TypeConverter.toIcebergType) PartitionField(org.apache.iceberg.PartitionField) DataFiles(org.apache.iceberg.DataFiles) CatalogSchemaName(io.trino.spi.connector.CatalogSchemaName) LOCATION_PROPERTY(io.trino.plugin.iceberg.IcebergTableProperties.LOCATION_PROPERTY) Path(org.apache.hadoop.fs.Path) DATA(io.trino.plugin.iceberg.TableType.DATA) ConnectorViewDefinition(io.trino.spi.connector.ConnectorViewDefinition) FileScanTask(org.apache.iceberg.FileScanTask) DataFile(org.apache.iceberg.DataFile) Splitter(com.google.common.base.Splitter) IcebergTableProperties.getPartitioning(io.trino.plugin.iceberg.IcebergTableProperties.getPartitioning) IcebergUtil.getFileFormat(io.trino.plugin.iceberg.IcebergUtil.getFileFormat) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) HiveWrittenPartitions(io.trino.plugin.hive.HiveWrittenPartitions) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) TrinoException(io.trino.spi.TrinoException) TableScan(org.apache.iceberg.TableScan) ConnectorOutputMetadata(io.trino.spi.connector.ConnectorOutputMetadata) String.format(java.lang.String.format) SchemaParser(org.apache.iceberg.SchemaParser) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) ClassLoaderSafeSystemTable(io.trino.plugin.base.classloader.ClassLoaderSafeSystemTable) IcebergUtil.getTableComment(io.trino.plugin.iceberg.IcebergUtil.getTableComment) Assignment(io.trino.spi.connector.Assignment) HiveApplyProjectionUtil(io.trino.plugin.hive.HiveApplyProjectionUtil) BeginTableExecuteResult(io.trino.spi.connector.BeginTableExecuteResult) PartitionSpec(org.apache.iceberg.PartitionSpec) Function.identity(java.util.function.Function.identity) TableProperties(org.apache.iceberg.TableProperties) Optional(java.util.Optional) ProjectedColumnRepresentation(io.trino.plugin.hive.HiveApplyProjectionUtil.ProjectedColumnRepresentation) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) Pattern(java.util.regex.Pattern) SystemTable(io.trino.spi.connector.SystemTable) JsonCodec(io.airlift.json.JsonCodec) Constraint(io.trino.spi.connector.Constraint) IcebergOptimizeHandle(io.trino.plugin.iceberg.procedure.IcebergOptimizeHandle) Logger(io.airlift.log.Logger) PartitionFields.toPartitionFields(io.trino.plugin.iceberg.PartitionFields.toPartitionFields) HashMap(java.util.HashMap) Deque(java.util.Deque) Function(java.util.function.Function) ExpressionConverter.toIcebergExpression(io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression) PARTITIONING_PROPERTY(io.trino.plugin.iceberg.IcebergTableProperties.PARTITIONING_PROPERTY) HashSet(java.util.HashSet) BiPredicate(java.util.function.BiPredicate) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) TableStatistics(io.trino.spi.statistics.TableStatistics) HiveApplyProjectionUtil.extractSupportedProjectedColumns(io.trino.plugin.hive.HiveApplyProjectionUtil.extractSupportedProjectedColumns) VerifyException(com.google.common.base.VerifyException) RetryMode(io.trino.spi.connector.RetryMode) Iterator(java.util.Iterator) TupleDomain(io.trino.spi.predicate.TupleDomain) IcebergUtil.toIcebergSchema(io.trino.plugin.iceberg.IcebergUtil.toIcebergSchema) Transaction(org.apache.iceberg.Transaction) Comparator(java.util.Comparator) TypeManager(io.trino.spi.type.TypeManager) HiveUtil.isStructuralType(io.trino.plugin.hive.util.HiveUtil.isStructuralType) Snapshot(org.apache.iceberg.Snapshot) Variable(io.trino.spi.expression.Variable) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) Assignment(io.trino.spi.connector.Assignment) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableSet(com.google.common.collect.ImmutableSet) ProjectedColumnRepresentation(io.trino.plugin.hive.HiveApplyProjectionUtil.ProjectedColumnRepresentation) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Aggregations

Splitter (com.google.common.base.Splitter)1 Suppliers (com.google.common.base.Suppliers)1 Verify.verify (com.google.common.base.Verify.verify)1 VerifyException (com.google.common.base.VerifyException)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)1 Iterables (com.google.common.collect.Iterables)1 JsonCodec (io.airlift.json.JsonCodec)1 Logger (io.airlift.log.Logger)1 Slice (io.airlift.slice.Slice)1 DataSize (io.airlift.units.DataSize)1 ClassLoaderSafeSystemTable (io.trino.plugin.base.classloader.ClassLoaderSafeSystemTable)1 HdfsEnvironment (io.trino.plugin.hive.HdfsEnvironment)1 HdfsContext (io.trino.plugin.hive.HdfsEnvironment.HdfsContext)1 HiveApplyProjectionUtil (io.trino.plugin.hive.HiveApplyProjectionUtil)1 ProjectedColumnRepresentation (io.trino.plugin.hive.HiveApplyProjectionUtil.ProjectedColumnRepresentation)1