Search in sources :

Example 71 with Domain

use of io.trino.spi.predicate.Domain in project trino by trinodb.

the class IcebergPageSourceProvider method createOrcPageSource.

private static ReaderPageSource createOrcPageSource(HdfsEnvironment hdfsEnvironment, ConnectorIdentity identity, Configuration configuration, Path path, long start, long length, long fileSize, List<IcebergColumnHandle> columns, TupleDomain<IcebergColumnHandle> effectivePredicate, OrcReaderOptions options, FileFormatDataSourceStats stats, TypeManager typeManager, Optional<NameMapping> nameMapping) {
    OrcDataSource orcDataSource = null;
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(identity, path, configuration);
        FSDataInputStream inputStream = hdfsEnvironment.doAs(identity, () -> fileSystem.open(path));
        orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSize, options, inputStream, stats);
        OrcReader reader = OrcReader.createOrcReader(orcDataSource, options).orElseThrow(() -> new TrinoException(ICEBERG_BAD_DATA, "ORC file is zero length"));
        List<OrcColumn> fileColumns = reader.getRootColumn().getNestedColumns();
        if (nameMapping.isPresent() && !hasIds(reader.getRootColumn())) {
            fileColumns = fileColumns.stream().map(orcColumn -> setMissingFieldIds(orcColumn, nameMapping.get(), ImmutableList.of(orcColumn.getColumnName()))).collect(toImmutableList());
        }
        Map<Integer, OrcColumn> fileColumnsByIcebergId = mapIdsToOrcFileColumns(fileColumns);
        TupleDomainOrcPredicateBuilder predicateBuilder = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(options.isBloomFiltersEnabled());
        Map<IcebergColumnHandle, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalArgumentException("Effective predicate is none"));
        Optional<ReaderColumns> columnProjections = projectColumns(columns);
        Map<Integer, List<List<Integer>>> projectionsByFieldId = columns.stream().collect(groupingBy(column -> column.getBaseColumnIdentity().getId(), mapping(IcebergColumnHandle::getPath, toUnmodifiableList())));
        List<IcebergColumnHandle> readColumns = columnProjections.map(readerColumns -> (List<IcebergColumnHandle>) readerColumns.get().stream().map(IcebergColumnHandle.class::cast).collect(toImmutableList())).orElse(columns);
        List<OrcColumn> fileReadColumns = new ArrayList<>(readColumns.size());
        List<Type> fileReadTypes = new ArrayList<>(readColumns.size());
        List<ProjectedLayout> projectedLayouts = new ArrayList<>(readColumns.size());
        List<ColumnAdaptation> columnAdaptations = new ArrayList<>(readColumns.size());
        for (IcebergColumnHandle column : readColumns) {
            verify(column.isBaseColumn(), "Column projections must be based from a root column");
            OrcColumn orcColumn = fileColumnsByIcebergId.get(column.getId());
            if (orcColumn != null) {
                Type readType = getOrcReadType(column.getType(), typeManager);
                if (column.getType() == UUID && !"UUID".equals(orcColumn.getAttributes().get(ICEBERG_BINARY_TYPE))) {
                    throw new TrinoException(ICEBERG_BAD_DATA, format("Expected ORC column for UUID data to be annotated with %s=UUID: %s", ICEBERG_BINARY_TYPE, orcColumn));
                }
                List<List<Integer>> fieldIdProjections = projectionsByFieldId.get(column.getId());
                ProjectedLayout projectedLayout = IcebergOrcProjectedLayout.createProjectedLayout(orcColumn, fieldIdProjections);
                int sourceIndex = fileReadColumns.size();
                columnAdaptations.add(ColumnAdaptation.sourceColumn(sourceIndex));
                fileReadColumns.add(orcColumn);
                fileReadTypes.add(readType);
                projectedLayouts.add(projectedLayout);
                for (Map.Entry<IcebergColumnHandle, Domain> domainEntry : effectivePredicateDomains.entrySet()) {
                    IcebergColumnHandle predicateColumn = domainEntry.getKey();
                    OrcColumn predicateOrcColumn = fileColumnsByIcebergId.get(predicateColumn.getId());
                    if (predicateOrcColumn != null && column.getColumnIdentity().equals(predicateColumn.getBaseColumnIdentity())) {
                        predicateBuilder.addColumn(predicateOrcColumn.getColumnId(), domainEntry.getValue());
                    }
                }
            } else {
                columnAdaptations.add(ColumnAdaptation.nullColumn(column.getType()));
            }
        }
        AggregatedMemoryContext memoryUsage = newSimpleAggregatedMemoryContext();
        OrcDataSourceId orcDataSourceId = orcDataSource.getId();
        OrcRecordReader recordReader = reader.createRecordReader(fileReadColumns, fileReadTypes, projectedLayouts, predicateBuilder.build(), start, length, UTC, memoryUsage, INITIAL_BATCH_SIZE, exception -> handleException(orcDataSourceId, exception), new IdBasedFieldMapperFactory(readColumns));
        return new ReaderPageSource(new OrcPageSource(recordReader, columnAdaptations, orcDataSource, Optional.empty(), Optional.empty(), memoryUsage, stats), columnProjections);
    } catch (Exception e) {
        if (orcDataSource != null) {
            try {
                orcDataSource.close();
            } catch (IOException ignored) {
            }
        }
        if (e instanceof TrinoException) {
            throw (TrinoException) e;
        }
        String message = format("Error opening Iceberg split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
        if (e instanceof BlockMissingException) {
            throw new TrinoException(ICEBERG_MISSING_DATA, message, e);
        }
        throw new TrinoException(ICEBERG_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) ORC_ICEBERG_ID_KEY(io.trino.plugin.iceberg.TypeConverter.ORC_ICEBERG_ID_KEY) ColumnAdaptation(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation) FileStatus(org.apache.hadoop.fs.FileStatus) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ParquetDataSourceId(io.trino.parquet.ParquetDataSourceId) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ICEBERG_CANNOT_OPEN_SPLIT(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CANNOT_OPEN_SPLIT) UUID(io.trino.spi.type.UuidType.UUID) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) ICEBERG_FILESYSTEM_ERROR(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ReaderColumns(io.trino.plugin.hive.ReaderColumns) Set(java.util.Set) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) OrcReaderConfig(io.trino.plugin.hive.orc.OrcReaderConfig) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ColumnIO(org.apache.parquet.io.ColumnIO) IcebergSessionProperties.getOrcTinyStripeThreshold(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcTinyStripeThreshold) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) ParquetCorruptionException(io.trino.parquet.ParquetCorruptionException) MappedField(org.apache.iceberg.mapping.MappedField) Collectors.groupingBy(java.util.stream.Collectors.groupingBy) IcebergSessionProperties.isOrcNestedLazy(io.trino.plugin.iceberg.IcebergSessionProperties.isOrcNestedLazy) IcebergSessionProperties.getOrcMaxBufferSize(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxBufferSize) HdfsParquetDataSource(io.trino.plugin.hive.parquet.HdfsParquetDataSource) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) TupleDomainOrcPredicateBuilder(io.trino.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) OrcPageSource(io.trino.plugin.hive.orc.OrcPageSource) IcebergSessionProperties.getOrcMaxMergeDistance(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance) ICEBERG_MISSING_DATA(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_MISSING_DATA) AggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) MappedFields(org.apache.iceberg.mapping.MappedFields) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) OrcType(io.trino.orc.metadata.OrcType) Predicate(io.trino.parquet.predicate.Predicate) IcebergSessionProperties.isUseFileSizeFromMetadata(io.trino.plugin.iceberg.IcebergSessionProperties.isUseFileSizeFromMetadata) MapType(io.trino.spi.type.MapType) PredicateUtils.predicateMatches(io.trino.parquet.predicate.PredicateUtils.predicateMatches) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) StandardTypes(io.trino.spi.type.StandardTypes) NameMappingParser(org.apache.iceberg.mapping.NameMappingParser) IcebergSessionProperties.getOrcLazyReadSmallRanges(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcLazyReadSmallRanges) IcebergSessionProperties.getParquetMaxReadBlockSize(io.trino.plugin.iceberg.IcebergSessionProperties.getParquetMaxReadBlockSize) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) UTC(org.joda.time.DateTimeZone.UTC) Field(io.trino.parquet.Field) Traverser(com.google.common.graph.Traverser) ParquetPageSource(io.trino.plugin.hive.parquet.ParquetPageSource) ProjectedLayout(io.trino.orc.OrcReader.ProjectedLayout) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) IcebergSessionProperties.getOrcStreamBufferSize(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcStreamBufferSize) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) ParquetSchemaUtil(org.apache.iceberg.parquet.ParquetSchemaUtil) OrcColumn(io.trino.orc.OrcColumn) PredicateUtils.buildPredicate(io.trino.parquet.predicate.PredicateUtils.buildPredicate) MetadataReader(io.trino.parquet.reader.MetadataReader) ICEBERG_DOMAIN_COMPACTION_THRESHOLD(io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD) OrcRecordReader(io.trino.orc.OrcRecordReader) NameMapping(org.apache.iceberg.mapping.NameMapping) Path(org.apache.hadoop.fs.Path) OrcDataSource(io.trino.orc.OrcDataSource) ReaderProjectionsAdapter(io.trino.plugin.hive.ReaderProjectionsAdapter) RowType(io.trino.spi.type.RowType) ICEBERG_BAD_DATA(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA) ImmutableMap(com.google.common.collect.ImmutableMap) INITIAL_BATCH_SIZE(io.trino.orc.OrcReader.INITIAL_BATCH_SIZE) ParquetReader(io.trino.parquet.reader.ParquetReader) FieldContext(io.trino.plugin.iceberg.IcebergParquetColumnIOConverter.FieldContext) ICEBERG_CURSOR_ERROR(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CURSOR_ERROR) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) ParquetTypeUtils.getColumnIO(io.trino.parquet.ParquetTypeUtils.getColumnIO) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Objects(java.util.Objects) OrcDataSourceId(io.trino.orc.OrcDataSourceId) MessageType(org.apache.parquet.schema.MessageType) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) IcebergSessionProperties.getOrcMaxReadBlockSize(io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxReadBlockSize) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) Type(io.trino.spi.type.Type) HashMap(java.util.HashMap) TupleDomainOrcPredicate(io.trino.orc.TupleDomainOrcPredicate) Function(java.util.function.Function) Inject(javax.inject.Inject) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) OrcReaderOptions(io.trino.orc.OrcReaderOptions) Objects.requireNonNull(java.util.Objects.requireNonNull) Collectors.mapping(java.util.stream.Collectors.mapping) IcebergSessionProperties.isOrcBloomFiltersEnabled(io.trino.plugin.iceberg.IcebergSessionProperties.isOrcBloomFiltersEnabled) HdfsOrcDataSource(io.trino.plugin.hive.orc.HdfsOrcDataSource) ParquetReaderOptions(io.trino.parquet.ParquetReaderOptions) OrcReader(io.trino.orc.OrcReader) ConnectorPageSourceProvider(io.trino.spi.connector.ConnectorPageSourceProvider) ICEBERG_BINARY_TYPE(io.trino.plugin.iceberg.TypeConverter.ICEBERG_BINARY_TYPE) TupleDomain(io.trino.spi.predicate.TupleDomain) OrcReader.fullyProjectedLayout(io.trino.orc.OrcReader.fullyProjectedLayout) OrcCorruptionException(io.trino.orc.OrcCorruptionException) Collectors.toList(java.util.stream.Collectors.toList) ParquetTypeUtils.getDescriptors(io.trino.parquet.ParquetTypeUtils.getDescriptors) ParquetDataSource(io.trino.parquet.ParquetDataSource) TypeManager(io.trino.spi.type.TypeManager) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) ArrayList(java.util.ArrayList) HdfsOrcDataSource(io.trino.plugin.hive.orc.HdfsOrcDataSource) FileSystem(org.apache.hadoop.fs.FileSystem) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) OrcDataSource(io.trino.orc.OrcDataSource) HdfsOrcDataSource(io.trino.plugin.hive.orc.HdfsOrcDataSource) OrcDataSourceId(io.trino.orc.OrcDataSourceId) OrcColumn(io.trino.orc.OrcColumn) OrcPageSource(io.trino.plugin.hive.orc.OrcPageSource) IOException(java.io.IOException) OrcRecordReader(io.trino.orc.OrcRecordReader) AggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ParquetCorruptionException(io.trino.parquet.ParquetCorruptionException) IOException(java.io.IOException) TrinoException(io.trino.spi.TrinoException) OrcCorruptionException(io.trino.orc.OrcCorruptionException) OrcType(io.trino.orc.metadata.OrcType) MapType(io.trino.spi.type.MapType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) MessageType(org.apache.parquet.schema.MessageType) Type(io.trino.spi.type.Type) OrcReader(io.trino.orc.OrcReader) TupleDomainOrcPredicateBuilder(io.trino.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) TrinoException(io.trino.spi.TrinoException) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) ReaderColumns(io.trino.plugin.hive.ReaderColumns) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) ProjectedLayout(io.trino.orc.OrcReader.ProjectedLayout) OrcReader.fullyProjectedLayout(io.trino.orc.OrcReader.fullyProjectedLayout) ColumnAdaptation(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation)

Example 72 with Domain

use of io.trino.spi.predicate.Domain in project trino by trinodb.

the class IcebergMetadata method applyFilter.

@Override
public Optional<ConstraintApplicationResult<ConnectorTableHandle>> applyFilter(ConnectorSession session, ConnectorTableHandle handle, Constraint constraint) {
    IcebergTableHandle table = (IcebergTableHandle) handle;
    Table icebergTable = catalog.loadTable(session, table.getSchemaTableName());
    Set<Integer> partitionSourceIds = identityPartitionColumnsInAllSpecs(icebergTable);
    BiPredicate<IcebergColumnHandle, Domain> isIdentityPartition = (column, domain) -> partitionSourceIds.contains(column.getId());
    TupleDomain<IcebergColumnHandle> newEnforcedConstraint = constraint.getSummary().transformKeys(IcebergColumnHandle.class::cast).filter(isIdentityPartition).intersect(table.getEnforcedPredicate());
    TupleDomain<IcebergColumnHandle> remainingConstraint = constraint.getSummary().transformKeys(IcebergColumnHandle.class::cast).filter(isIdentityPartition.negate());
    TupleDomain<IcebergColumnHandle> newUnenforcedConstraint = remainingConstraint.filter((columnHandle, predicate) -> !isStructuralType(columnHandle.getType())).intersect(table.getUnenforcedPredicate());
    if (newEnforcedConstraint.equals(table.getEnforcedPredicate()) && newUnenforcedConstraint.equals(table.getUnenforcedPredicate())) {
        return Optional.empty();
    }
    return Optional.of(new ConstraintApplicationResult<>(new IcebergTableHandle(table.getSchemaName(), table.getTableName(), table.getTableType(), table.getSnapshotId(), newUnenforcedConstraint, newEnforcedConstraint, table.getProjectedColumns(), table.getNameMappingJson()), remainingConstraint.transformKeys(ColumnHandle.class::cast), false));
}
Also used : IcebergUtil.getPartitionKeys(io.trino.plugin.iceberg.IcebergUtil.getPartitionKeys) TrinoCatalog(io.trino.plugin.iceberg.catalog.TrinoCatalog) FileSystem(org.apache.hadoop.fs.FileSystem) ConnectorTableExecuteHandle(io.trino.spi.connector.ConnectorTableExecuteHandle) HiveApplyProjectionUtil.replaceWithNewVariables(io.trino.plugin.hive.HiveApplyProjectionUtil.replaceWithNewVariables) Collections.singletonList(java.util.Collections.singletonList) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Matcher(java.util.regex.Matcher) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) RewriteFiles(org.apache.iceberg.RewriteFiles) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) CloseableIterable(org.apache.iceberg.io.CloseableIterable) IcebergUtil.newCreateTableTransaction(io.trino.plugin.iceberg.IcebergUtil.newCreateTableTransaction) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) IcebergTableExecuteHandle(io.trino.plugin.iceberg.procedure.IcebergTableExecuteHandle) Set(java.util.Set) Schema(org.apache.iceberg.Schema) ColumnIdentity.primitiveColumnIdentity(io.trino.plugin.iceberg.ColumnIdentity.primitiveColumnIdentity) SchemaTableName(io.trino.spi.connector.SchemaTableName) PartitionSpecParser(org.apache.iceberg.PartitionSpecParser) Collectors.joining(java.util.stream.Collectors.joining) Type(org.apache.iceberg.types.Type) UncheckedIOException(java.io.UncheckedIOException) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) TrinoPrincipal(io.trino.spi.security.TrinoPrincipal) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) Iterables(com.google.common.collect.Iterables) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) IcebergUtil.deserializePartitionValue(io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue) Slice(io.airlift.slice.Slice) NullableValue(io.trino.spi.predicate.NullableValue) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Variable(io.trino.spi.expression.Variable) Supplier(java.util.function.Supplier) OptionalLong(java.util.OptionalLong) MaterializedViewFreshness(io.trino.spi.connector.MaterializedViewFreshness) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) FILE_FORMAT_PROPERTY(io.trino.plugin.iceberg.IcebergTableProperties.FILE_FORMAT_PROPERTY) OPTIMIZE(io.trino.plugin.iceberg.procedure.IcebergTableProcedureId.OPTIMIZE) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) DEPENDS_ON_TABLES(io.trino.plugin.iceberg.catalog.hms.TrinoHiveCatalog.DEPENDS_ON_TABLES) Table(org.apache.iceberg.Table) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) ICEBERG_INVALID_METADATA(io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_INVALID_METADATA) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) DiscretePredicates(io.trino.spi.connector.DiscretePredicates) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) PartitionFields.parsePartitionFields(io.trino.plugin.iceberg.PartitionFields.parsePartitionFields) ArrayDeque(java.util.ArrayDeque) MaterializedViewNotFoundException(io.trino.spi.connector.MaterializedViewNotFoundException) TypeConverter.toTrinoType(io.trino.plugin.iceberg.TypeConverter.toTrinoType) IcebergUtil.getColumns(io.trino.plugin.iceberg.IcebergUtil.getColumns) IcebergSessionProperties.isProjectionPushdownEnabled(io.trino.plugin.iceberg.IcebergSessionProperties.isProjectionPushdownEnabled) IcebergTableProcedureId(io.trino.plugin.iceberg.procedure.IcebergTableProcedureId) IcebergSessionProperties.isStatisticsEnabled(io.trino.plugin.iceberg.IcebergSessionProperties.isStatisticsEnabled) AppendFiles(org.apache.iceberg.AppendFiles) NO_RETRIES(io.trino.spi.connector.RetryMode.NO_RETRIES) ConnectorMaterializedViewDefinition(io.trino.spi.connector.ConnectorMaterializedViewDefinition) TypeConverter.toIcebergType(io.trino.plugin.iceberg.TypeConverter.toIcebergType) PartitionField(org.apache.iceberg.PartitionField) DataFiles(org.apache.iceberg.DataFiles) CatalogSchemaName(io.trino.spi.connector.CatalogSchemaName) LOCATION_PROPERTY(io.trino.plugin.iceberg.IcebergTableProperties.LOCATION_PROPERTY) Path(org.apache.hadoop.fs.Path) DATA(io.trino.plugin.iceberg.TableType.DATA) ConnectorViewDefinition(io.trino.spi.connector.ConnectorViewDefinition) FileScanTask(org.apache.iceberg.FileScanTask) DataFile(org.apache.iceberg.DataFile) Splitter(com.google.common.base.Splitter) IcebergTableProperties.getPartitioning(io.trino.plugin.iceberg.IcebergTableProperties.getPartitioning) IcebergUtil.getFileFormat(io.trino.plugin.iceberg.IcebergUtil.getFileFormat) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) HiveWrittenPartitions(io.trino.plugin.hive.HiveWrittenPartitions) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) TrinoException(io.trino.spi.TrinoException) TableScan(org.apache.iceberg.TableScan) ConnectorOutputMetadata(io.trino.spi.connector.ConnectorOutputMetadata) String.format(java.lang.String.format) SchemaParser(org.apache.iceberg.SchemaParser) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) ClassLoaderSafeSystemTable(io.trino.plugin.base.classloader.ClassLoaderSafeSystemTable) IcebergUtil.getTableComment(io.trino.plugin.iceberg.IcebergUtil.getTableComment) Assignment(io.trino.spi.connector.Assignment) HiveApplyProjectionUtil(io.trino.plugin.hive.HiveApplyProjectionUtil) BeginTableExecuteResult(io.trino.spi.connector.BeginTableExecuteResult) PartitionSpec(org.apache.iceberg.PartitionSpec) Function.identity(java.util.function.Function.identity) TableProperties(org.apache.iceberg.TableProperties) Optional(java.util.Optional) ProjectedColumnRepresentation(io.trino.plugin.hive.HiveApplyProjectionUtil.ProjectedColumnRepresentation) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) Pattern(java.util.regex.Pattern) SystemTable(io.trino.spi.connector.SystemTable) JsonCodec(io.airlift.json.JsonCodec) Constraint(io.trino.spi.connector.Constraint) IcebergOptimizeHandle(io.trino.plugin.iceberg.procedure.IcebergOptimizeHandle) Logger(io.airlift.log.Logger) PartitionFields.toPartitionFields(io.trino.plugin.iceberg.PartitionFields.toPartitionFields) HashMap(java.util.HashMap) Deque(java.util.Deque) Function(java.util.function.Function) ExpressionConverter.toIcebergExpression(io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression) PARTITIONING_PROPERTY(io.trino.plugin.iceberg.IcebergTableProperties.PARTITIONING_PROPERTY) HashSet(java.util.HashSet) BiPredicate(java.util.function.BiPredicate) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) TableStatistics(io.trino.spi.statistics.TableStatistics) HiveApplyProjectionUtil.extractSupportedProjectedColumns(io.trino.plugin.hive.HiveApplyProjectionUtil.extractSupportedProjectedColumns) VerifyException(com.google.common.base.VerifyException) RetryMode(io.trino.spi.connector.RetryMode) Iterator(java.util.Iterator) TupleDomain(io.trino.spi.predicate.TupleDomain) IcebergUtil.toIcebergSchema(io.trino.plugin.iceberg.IcebergUtil.toIcebergSchema) Transaction(org.apache.iceberg.Transaction) Comparator(java.util.Comparator) TypeManager(io.trino.spi.type.TypeManager) HiveUtil.isStructuralType(io.trino.plugin.hive.util.HiveUtil.isStructuralType) Snapshot(org.apache.iceberg.Snapshot) ColumnHandle(io.trino.spi.connector.ColumnHandle) Table(org.apache.iceberg.Table) ClassLoaderSafeSystemTable(io.trino.plugin.base.classloader.ClassLoaderSafeSystemTable) SystemTable(io.trino.spi.connector.SystemTable) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain)

Example 73 with Domain

use of io.trino.spi.predicate.Domain in project trino by trinodb.

the class ColumnJdbcTable method cursor.

@Override
public RecordCursor cursor(ConnectorTransactionHandle transactionHandle, ConnectorSession connectorSession, TupleDomain<Integer> constraint) {
    Builder table = InMemoryRecordSet.builder(METADATA);
    if (constraint.isNone()) {
        return table.build().cursor();
    }
    Session session = ((FullConnectorSession) connectorSession).getSession();
    boolean omitDateTimeTypePrecision = isOmitDateTimeTypePrecision(session);
    Optional<String> catalogFilter = tryGetSingleVarcharValue(constraint, 0);
    Optional<String> schemaFilter = tryGetSingleVarcharValue(constraint, 1);
    Optional<String> tableFilter = tryGetSingleVarcharValue(constraint, 2);
    Domain catalogDomain = constraint.getDomains().get().getOrDefault(0, Domain.all(createUnboundedVarcharType()));
    Domain schemaDomain = constraint.getDomains().get().getOrDefault(1, Domain.all(createUnboundedVarcharType()));
    Domain tableDomain = constraint.getDomains().get().getOrDefault(2, Domain.all(createUnboundedVarcharType()));
    if (isNonLowercase(schemaFilter) || isNonLowercase(tableFilter)) {
        // Non-lowercase predicate will never match a lowercase name (until TODO https://github.com/trinodb/trino/issues/17)
        return table.build().cursor();
    }
    for (String catalog : listCatalogs(session, metadata, accessControl, catalogFilter).keySet()) {
        if (!catalogDomain.includesNullableValue(utf8Slice(catalog))) {
            continue;
        }
        if ((schemaDomain.isAll() && tableDomain.isAll()) || schemaFilter.isPresent()) {
            QualifiedTablePrefix tablePrefix = tablePrefix(catalog, schemaFilter, tableFilter);
            Map<SchemaTableName, List<ColumnMetadata>> tableColumns = listTableColumns(session, metadata, accessControl, tablePrefix);
            addColumnsRow(table, catalog, tableColumns, omitDateTimeTypePrecision);
        } else {
            Collection<String> schemas = listSchemas(session, metadata, accessControl, catalog, schemaFilter);
            for (String schema : schemas) {
                if (!schemaDomain.includesNullableValue(utf8Slice(schema))) {
                    continue;
                }
                QualifiedTablePrefix tablePrefix = tableFilter.isPresent() ? new QualifiedTablePrefix(catalog, schema, tableFilter.get()) : new QualifiedTablePrefix(catalog, schema);
                Set<SchemaTableName> tables = listTables(session, metadata, accessControl, tablePrefix);
                for (SchemaTableName schemaTableName : tables) {
                    String tableName = schemaTableName.getTableName();
                    if (!tableDomain.includesNullableValue(utf8Slice(tableName))) {
                        continue;
                    }
                    Map<SchemaTableName, List<ColumnMetadata>> tableColumns = listTableColumns(session, metadata, accessControl, new QualifiedTablePrefix(catalog, schema, tableName));
                    addColumnsRow(table, catalog, tableColumns, omitDateTimeTypePrecision);
                }
            }
        }
    }
    return table.build().cursor();
}
Also used : TableMetadataBuilder.tableMetadataBuilder(io.trino.metadata.MetadataUtil.TableMetadataBuilder.tableMetadataBuilder) Builder(io.trino.spi.connector.InMemoryRecordSet.Builder) SchemaTableName(io.trino.spi.connector.SchemaTableName) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) QualifiedTablePrefix(io.trino.metadata.QualifiedTablePrefix) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) Session(io.trino.Session) FullConnectorSession(io.trino.FullConnectorSession) ConnectorSession(io.trino.spi.connector.ConnectorSession) FullConnectorSession(io.trino.FullConnectorSession)

Example 74 with Domain

use of io.trino.spi.predicate.Domain in project trino by trinodb.

the class DynamicFilterService method collectDynamicFilters.

private void collectDynamicFilters(StageId stageId, Optional<Set<DynamicFilterId>> selectedFilters) {
    DynamicFilterContext context = dynamicFilterContexts.get(stageId.getQueryId());
    if (context == null) {
        // query has been removed
        return;
    }
    OptionalInt stageNumberOfTasks = context.getNumberOfTasks(stageId);
    Map<DynamicFilterId, List<Domain>> newDynamicFilters = context.getTaskDynamicFilters(stageId, selectedFilters).entrySet().stream().filter(stageDomains -> {
        if (stageDomains.getValue().stream().anyMatch(Domain::isAll)) {
            // if one of the domains is all, we don't need to get dynamic filters from all tasks
            return true;
        }
        if (!stageDomains.getValue().isEmpty() && context.getReplicatedDynamicFilters().contains(stageDomains.getKey())) {
            // for replicated dynamic filters it's enough to get dynamic filter from a single task
            checkState(stageDomains.getValue().size() == 1, "Replicated dynamic filter should be collected from single task");
            return true;
        }
        // check if all tasks of a dynamic filter source have reported dynamic filter summary
        return stageNumberOfTasks.isPresent() && stageDomains.getValue().size() == stageNumberOfTasks.getAsInt();
    }).collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue));
    context.addDynamicFilters(newDynamicFilters);
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) QueryId(io.trino.spi.QueryId) PlanFragment(io.trino.sql.planner.PlanFragment) EMPTY(io.trino.spi.connector.DynamicFilter.EMPTY) Inject(com.google.inject.Inject) DynamicFilters.extractDynamicFilters(io.trino.sql.DynamicFilters.extractDynamicFilters) Duration.succinctNanos(io.airlift.units.Duration.succinctNanos) Domain.union(io.trino.spi.predicate.Domain.union) TypeOperators(io.trino.spi.type.TypeOperators) SettableFuture(com.google.common.util.concurrent.SettableFuture) Duration(io.airlift.units.Duration) PlanNode(io.trino.sql.planner.plan.PlanNode) PreDestroy(javax.annotation.PreDestroy) Sets.difference(com.google.common.collect.Sets.difference) HashMultimap(com.google.common.collect.HashMultimap) DynamicFilters.extractSourceSymbols(io.trino.sql.DynamicFilters.extractSourceSymbols) DynamicFilters(io.trino.sql.DynamicFilters) PlanNodeSearcher(io.trino.sql.planner.optimizations.PlanNodeSearcher) Map(java.util.Map) Sets.union(com.google.common.collect.Sets.union) ExpressionExtractor.extractExpressions(io.trino.sql.planner.ExpressionExtractor.extractExpressions) JoinNode(io.trino.sql.planner.plan.JoinNode) Functions.identity(com.google.common.base.Functions.identity) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Domain(io.trino.spi.predicate.Domain) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) SemiJoinNode(io.trino.sql.planner.plan.SemiJoinNode) ThreadSafe(javax.annotation.concurrent.ThreadSafe) GuardedBy(javax.annotation.concurrent.GuardedBy) TaskId(io.trino.execution.TaskId) JoinUtils(io.trino.operator.join.JoinUtils) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Objects(java.util.Objects) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) SubPlan(io.trino.sql.planner.SubPlan) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) MoreFutures.whenAnyComplete(io.airlift.concurrent.MoreFutures.whenAnyComplete) MoreFutures.unmodifiableFuture(io.airlift.concurrent.MoreFutures.unmodifiableFuture) Session(io.trino.Session) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) StageId(io.trino.execution.StageId) Type(io.trino.spi.type.Type) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Multimap(com.google.common.collect.Multimap) OptionalInt(java.util.OptionalInt) AtomicReference(java.util.concurrent.atomic.AtomicReference) DynamicFilterId(io.trino.sql.planner.plan.DynamicFilterId) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Threads.daemonThreadsNamed(io.airlift.concurrent.Threads.daemonThreadsNamed) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) DynamicFilterConfig(io.trino.execution.DynamicFilterConfig) ExecutorService(java.util.concurrent.ExecutorService) MoreFutures.toCompletableFuture(io.airlift.concurrent.MoreFutures.toCompletableFuture) Symbol(io.trino.sql.planner.Symbol) SqlQueryExecution(io.trino.execution.SqlQueryExecution) Sets.newConcurrentHashSet(com.google.common.collect.Sets.newConcurrentHashSet) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) SetMultimap(com.google.common.collect.SetMultimap) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) FunctionManager(io.trino.metadata.FunctionManager) Consumer(java.util.function.Consumer) MorePredicates.isInstanceOfAny(io.trino.util.MorePredicates.isInstanceOfAny) Sets.intersection(com.google.common.collect.Sets.intersection) DomainCoercer.applySaturatedCasts(io.trino.sql.planner.DomainCoercer.applySaturatedCasts) SOURCE_DISTRIBUTION(io.trino.sql.planner.SystemPartitioningHandle.SOURCE_DISTRIBUTION) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) Metadata(io.trino.metadata.Metadata) TypeProvider(io.trino.sql.planner.TypeProvider) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) OptionalInt(java.util.OptionalInt) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) HashMap(java.util.HashMap) DynamicFilterId(io.trino.sql.planner.plan.DynamicFilterId)

Example 75 with Domain

use of io.trino.spi.predicate.Domain in project trino by trinodb.

the class DynamicFilterService method translateSummaryToTupleDomain.

private TupleDomain<ColumnHandle> translateSummaryToTupleDomain(DynamicFilterId filterId, DynamicFilterContext dynamicFilterContext, Multimap<DynamicFilterId, DynamicFilters.Descriptor> descriptorMultimap, Map<Symbol, ColumnHandle> columnHandles, TypeProvider typeProvider) {
    Collection<DynamicFilters.Descriptor> descriptors = descriptorMultimap.get(filterId);
    checkState(descriptors != null, "No descriptors for dynamic filter %s", filterId);
    Domain summary = dynamicFilterContext.getDynamicFilterSummaries().get(filterId);
    return TupleDomain.withColumnDomains(descriptors.stream().collect(toImmutableMap(descriptor -> {
        Symbol probeSymbol = Symbol.from(descriptor.getInput());
        return requireNonNull(columnHandles.get(probeSymbol), () -> format("Missing probe column for %s", probeSymbol));
    }, descriptor -> {
        Type targetType = typeProvider.get(Symbol.from(descriptor.getInput()));
        Domain updatedSummary = descriptor.applyComparison(summary);
        if (!updatedSummary.getType().equals(targetType)) {
            return applySaturatedCasts(metadata, functionManager, typeOperators, dynamicFilterContext.getSession(), updatedSummary, targetType);
        }
        return updatedSummary;
    })));
}
Also used : Type(io.trino.spi.type.Type) Symbol(io.trino.sql.planner.Symbol) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain)

Aggregations

Domain (io.trino.spi.predicate.Domain)120 TupleDomain (io.trino.spi.predicate.TupleDomain)107 ColumnHandle (io.trino.spi.connector.ColumnHandle)51 Test (org.testng.annotations.Test)45 Map (java.util.Map)38 ImmutableList (com.google.common.collect.ImmutableList)36 ImmutableMap (com.google.common.collect.ImmutableMap)33 List (java.util.List)27 Optional (java.util.Optional)25 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)23 Type (io.trino.spi.type.Type)23 ConnectorSession (io.trino.spi.connector.ConnectorSession)21 SchemaTableName (io.trino.spi.connector.SchemaTableName)21 Objects.requireNonNull (java.util.Objects.requireNonNull)21 Set (java.util.Set)20 Range (io.trino.spi.predicate.Range)19 ValueSet (io.trino.spi.predicate.ValueSet)18 Constraint (io.trino.spi.connector.Constraint)17 String.format (java.lang.String.format)17 ImmutableSet (com.google.common.collect.ImmutableSet)16