Search in sources :

Example 26 with TupleDomain

use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.

the class TestOrcPageSourceFactory method readFile.

private static List<Nation> readFile(Map<NationColumn, Integer> columns, OptionalLong nationKeyPredicate, Optional<AcidInfo> acidInfo, String filePath, long fileSize) {
    TupleDomain<HiveColumnHandle> tupleDomain = TupleDomain.all();
    if (nationKeyPredicate.isPresent()) {
        tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of(toHiveColumnHandle(NATION_KEY, 0), Domain.singleValue(INTEGER, nationKeyPredicate.getAsLong())));
    }
    List<HiveColumnHandle> columnHandles = columns.entrySet().stream().map(entry -> toHiveColumnHandle(entry.getKey(), entry.getValue())).collect(toImmutableList());
    List<String> columnNames = columnHandles.stream().map(HiveColumnHandle::getName).collect(toImmutableList());
    Optional<ReaderPageSource> pageSourceWithProjections = PAGE_SOURCE_FACTORY.createPageSource(new JobConf(new Configuration(false)), SESSION, new Path(filePath), 0, fileSize, fileSize, createSchema(), columnHandles, tupleDomain, acidInfo, OptionalInt.empty(), false, NO_ACID_TRANSACTION);
    checkArgument(pageSourceWithProjections.isPresent());
    checkArgument(pageSourceWithProjections.get().getReaderColumns().isEmpty(), "projected columns not expected here");
    ConnectorPageSource pageSource = pageSourceWithProjections.get().get();
    int nationKeyColumn = columnNames.indexOf("n_nationkey");
    int nameColumn = columnNames.indexOf("n_name");
    int regionKeyColumn = columnNames.indexOf("n_regionkey");
    int commentColumn = columnNames.indexOf("n_comment");
    ImmutableList.Builder<Nation> rows = ImmutableList.builder();
    while (!pageSource.isFinished()) {
        Page page = pageSource.getNextPage();
        if (page == null) {
            continue;
        }
        page = page.getLoadedPage();
        for (int position = 0; position < page.getPositionCount(); position++) {
            long nationKey = -42;
            if (nationKeyColumn >= 0) {
                nationKey = BIGINT.getLong(page.getBlock(nationKeyColumn), position);
            }
            String name = "<not read>";
            if (nameColumn >= 0) {
                name = VARCHAR.getSlice(page.getBlock(nameColumn), position).toStringUtf8();
            }
            long regionKey = -42;
            if (regionKeyColumn >= 0) {
                regionKey = BIGINT.getLong(page.getBlock(regionKeyColumn), position);
            }
            String comment = "<not read>";
            if (commentColumn >= 0) {
                comment = VARCHAR.getSlice(page.getBlock(commentColumn), position).toStringUtf8();
            }
            rows.add(new Nation(position, nationKey, name, regionKey, comment));
        }
    }
    return rows.build();
}
Also used : URISyntaxException(java.net.URISyntaxException) Test(org.testng.annotations.Test) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) Assertions(org.assertj.core.api.Assertions) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) LongPredicate(java.util.function.LongPredicate) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) INTEGER(io.trino.spi.type.IntegerType.INTEGER) Assert.assertFalse(org.testng.Assert.assertFalse) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.nCopies(java.util.Collections.nCopies) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) AcidUtils.deleteDeltaSubdir(org.apache.hadoop.hive.ql.io.AcidUtils.deleteDeltaSubdir) REGION_KEY(io.trino.tpch.NationColumn.REGION_KEY) Nation(io.trino.tpch.Nation) NationGenerator(io.trino.tpch.NationGenerator) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) Resources.getResource(com.google.common.io.Resources.getResource) NATION_KEY(io.trino.tpch.NationColumn.NATION_KEY) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Optional(java.util.Optional) HivePageSourceFactory(io.trino.plugin.hive.HivePageSourceFactory) NAME(io.trino.tpch.NationColumn.NAME) Type(io.trino.spi.type.Type) Page(io.trino.spi.Page) Assert.assertEquals(org.testng.Assert.assertEquals) OptionalInt(java.util.OptionalInt) ArrayList(java.util.ArrayList) OptionalLong(java.util.OptionalLong) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) ImmutableList(com.google.common.collect.ImmutableList) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) COMMENT(io.trino.tpch.NationColumn.COMMENT) NationColumn(io.trino.tpch.NationColumn) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) Properties(java.util.Properties) ORC(io.trino.plugin.hive.HiveStorageFormat.ORC) TABLE_IS_TRANSACTIONAL(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_TRANSACTIONAL) TupleDomain(io.trino.spi.predicate.TupleDomain) AcidInfo(io.trino.plugin.hive.AcidInfo) File(java.io.File) JobConf(org.apache.hadoop.mapred.JobConf) SESSION(io.trino.plugin.hive.HiveTestUtils.SESSION) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) HiveConfig(io.trino.plugin.hive.HiveConfig) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) Path(org.apache.hadoop.fs.Path) Nation(io.trino.tpch.Nation) Configuration(org.apache.hadoop.conf.Configuration) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) Page(io.trino.spi.Page) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) JobConf(org.apache.hadoop.mapred.JobConf) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle)

Example 27 with TupleDomain

use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.

the class TestOrcPredicates method createPageSource.

private ConnectorPageSource createPageSource(TupleDomain<TestColumn> effectivePredicate, List<TestColumn> columnsToRead, ConnectorSession session, FileSplit split) {
    OrcPageSourceFactory readerFactory = new OrcPageSourceFactory(new OrcReaderOptions(), HDFS_ENVIRONMENT, STATS, UTC);
    Properties splitProperties = new Properties();
    splitProperties.setProperty(FILE_INPUT_FORMAT, ORC.getInputFormat());
    splitProperties.setProperty(SERIALIZATION_LIB, ORC.getSerde());
    // Use full columns in split properties
    ImmutableList.Builder<String> splitPropertiesColumnNames = ImmutableList.builder();
    ImmutableList.Builder<String> splitPropertiesColumnTypes = ImmutableList.builder();
    Set<String> baseColumnNames = new HashSet<>();
    for (TestColumn columnToRead : columnsToRead) {
        String name = columnToRead.getBaseName();
        if (!baseColumnNames.contains(name) && !columnToRead.isPartitionKey()) {
            baseColumnNames.add(name);
            splitPropertiesColumnNames.add(name);
            splitPropertiesColumnTypes.add(columnToRead.getBaseObjectInspector().getTypeName());
        }
    }
    splitProperties.setProperty("columns", splitPropertiesColumnNames.build().stream().collect(Collectors.joining(",")));
    splitProperties.setProperty("columns.types", splitPropertiesColumnTypes.build().stream().collect(Collectors.joining(",")));
    List<HivePartitionKey> partitionKeys = columnsToRead.stream().filter(TestColumn::isPartitionKey).map(input -> new HivePartitionKey(input.getName(), (String) input.getWriteValue())).collect(toList());
    String partitionName = String.join("/", partitionKeys.stream().map(partitionKey -> format("%s=%s", partitionKey.getName(), partitionKey.getValue())).collect(toImmutableList()));
    List<HiveColumnHandle> columnHandles = getColumnHandles(columnsToRead);
    TupleDomain<HiveColumnHandle> predicate = effectivePredicate.transformKeys(testColumn -> {
        Optional<HiveColumnHandle> handle = columnHandles.stream().filter(column -> testColumn.getName().equals(column.getName())).findFirst();
        checkState(handle.isPresent(), "Predicate on invalid column");
        return handle.get();
    });
    List<HivePageSourceProvider.ColumnMapping> columnMappings = buildColumnMappings(partitionName, partitionKeys, columnHandles, ImmutableList.of(), TableToPartitionMapping.empty(), split.getPath(), OptionalInt.empty(), split.getLength(), Instant.now().toEpochMilli());
    Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(readerFactory), ImmutableSet.of(), new Configuration(false), session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), split.getLength(), splitProperties, predicate, columnHandles, TESTING_TYPE_MANAGER, Optional.empty(), Optional.empty(), false, Optional.empty(), false, NO_ACID_TRANSACTION, columnMappings);
    assertTrue(pageSource.isPresent());
    return pageSource.get();
}
Also used : HivePageSourceProvider(io.trino.plugin.hive.HivePageSourceProvider) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Test(org.testng.annotations.Test) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) FileSplit(org.apache.hadoop.mapred.FileSplit) Configuration(org.apache.hadoop.conf.Configuration) StructuralTestUtil.rowBlockOf(io.trino.testing.StructuralTestUtil.rowBlockOf) AbstractTestHiveFileFormats(io.trino.plugin.hive.AbstractTestHiveFileFormats) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) HiveCompressionCodec(io.trino.plugin.hive.HiveCompressionCodec) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) ImmutableSet(com.google.common.collect.ImmutableSet) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) ImmutableMap(com.google.common.collect.ImmutableMap) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) List(java.util.List) ColumnMapping.buildColumnMappings(io.trino.plugin.hive.HivePageSourceProvider.ColumnMapping.buildColumnMappings) TableToPartitionMapping(io.trino.plugin.hive.TableToPartitionMapping) BIGINT(io.trino.spi.type.BigintType.BIGINT) Optional(java.util.Optional) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) Page(io.trino.spi.Page) Assert.assertEquals(org.testng.Assert.assertEquals) OptionalInt(java.util.OptionalInt) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) NodeVersion(io.trino.plugin.hive.NodeVersion) OrcReaderOptions(io.trino.orc.OrcReaderOptions) Properties(java.util.Properties) ORC(io.trino.plugin.hive.HiveStorageFormat.ORC) HivePartitionKey(io.trino.plugin.hive.HivePartitionKey) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) ConnectorSession(io.trino.spi.connector.ConnectorSession) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TupleDomain(io.trino.spi.predicate.TupleDomain) UTC(org.joda.time.DateTimeZone.UTC) File(java.io.File) HiveTestUtils.getHiveSession(io.trino.plugin.hive.HiveTestUtils.getHiveSession) Collectors.toList(java.util.stream.Collectors.toList) OrcWriterOptions(io.trino.orc.OrcWriterOptions) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) HiveConfig(io.trino.plugin.hive.HiveConfig) Configuration(org.apache.hadoop.conf.Configuration) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) Properties(java.util.Properties) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) OrcReaderOptions(io.trino.orc.OrcReaderOptions) HivePartitionKey(io.trino.plugin.hive.HivePartitionKey) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) HashSet(java.util.HashSet)

Example 28 with TupleDomain

use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.

the class Util method domainsMatch.

static boolean domainsMatch(TupleDomain<Predicate<ColumnHandle>> expected, TupleDomain<ColumnHandle> actual) {
    Optional<Map<Predicate<ColumnHandle>, Domain>> expectedDomains = expected.getDomains();
    Optional<Map<ColumnHandle, Domain>> actualDomains = actual.getDomains();
    if (expectedDomains.isPresent() != actualDomains.isPresent()) {
        return false;
    }
    if (expectedDomains.isPresent()) {
        if (expectedDomains.get().size() != actualDomains.get().size()) {
            return false;
        }
        for (Map.Entry<Predicate<ColumnHandle>, Domain> entry : expectedDomains.get().entrySet()) {
            // There should be exactly one column matching the expected column matcher
            ColumnHandle actualColumn = Iterables.getOnlyElement(actualDomains.get().keySet().stream().filter(x -> entry.getKey().test(x)).collect(toImmutableList()));
            if (!actualDomains.get().get(actualColumn).contains(entry.getValue())) {
                return false;
            }
        }
    }
    return true;
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) Map(java.util.Map) Predicate(java.util.function.Predicate)

Example 29 with TupleDomain

use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.

the class HiveMetastoreBackedDeltaLakeMetastore method getTableStatistics.

@Override
public TableStatistics getTableStatistics(ConnectorSession session, DeltaLakeTableHandle tableHandle, Constraint constraint) {
    TableSnapshot tableSnapshot = getSnapshot(tableHandle.getSchemaTableName(), session);
    double numRecords = 0L;
    MetadataEntry metadata = transactionLogAccess.getMetadataEntry(tableSnapshot, session).orElseThrow(() -> new TrinoException(DELTA_LAKE_INVALID_SCHEMA, "Metadata not found in transaction log for " + tableHandle.getTableName()));
    List<ColumnMetadata> columnMetadata = DeltaLakeSchemaSupport.extractSchema(metadata, typeManager);
    List<DeltaLakeColumnHandle> columns = columnMetadata.stream().map(columnMeta -> new DeltaLakeColumnHandle(columnMeta.getName(), columnMeta.getType(), metadata.getCanonicalPartitionColumns().contains(columnMeta.getName()) ? PARTITION_KEY : REGULAR)).collect(toImmutableList());
    Map<DeltaLakeColumnHandle, Double> nullCounts = new HashMap<>();
    columns.forEach(column -> nullCounts.put(column, 0.0));
    Map<DeltaLakeColumnHandle, Double> minValues = new HashMap<>();
    Map<DeltaLakeColumnHandle, Double> maxValues = new HashMap<>();
    Map<DeltaLakeColumnHandle, Set<String>> partitioningColumnsDistinctValues = new HashMap<>();
    columns.stream().filter(column -> column.getColumnType() == PARTITION_KEY).forEach(column -> partitioningColumnsDistinctValues.put(column, new HashSet<>()));
    if (tableHandle.getEnforcedPartitionConstraint().isNone() || tableHandle.getNonPartitionConstraint().isNone() || constraint.getSummary().isNone()) {
        return createZeroStatistics(columns);
    }
    Set<String> predicatedColumnNames = tableHandle.getNonPartitionConstraint().getDomains().orElseThrow().keySet().stream().map(DeltaLakeColumnHandle::getName).collect(toImmutableSet());
    List<ColumnMetadata> predicatedColumns = columnMetadata.stream().filter(column -> predicatedColumnNames.contains(column.getName())).collect(toImmutableList());
    for (AddFileEntry addEntry : transactionLogAccess.getActiveFiles(tableSnapshot, session)) {
        Optional<? extends DeltaLakeFileStatistics> fileStatistics = addEntry.getStats();
        if (fileStatistics.isEmpty()) {
            // Open source Delta Lake does not collect stats
            return TableStatistics.empty();
        }
        DeltaLakeFileStatistics stats = fileStatistics.get();
        if (!partitionMatchesPredicate(addEntry.getCanonicalPartitionValues(), tableHandle.getEnforcedPartitionConstraint().getDomains().orElseThrow())) {
            continue;
        }
        TupleDomain<DeltaLakeColumnHandle> statisticsPredicate = createStatisticsPredicate(addEntry, predicatedColumns, tableHandle.getMetadataEntry().getCanonicalPartitionColumns());
        if (!tableHandle.getNonPartitionConstraint().overlaps(statisticsPredicate)) {
            continue;
        }
        if (stats.getNumRecords().isEmpty()) {
            // Not clear if it's possible for stats to be present with no row count, but bail out if that happens
            return TableStatistics.empty();
        }
        numRecords += stats.getNumRecords().get();
        for (DeltaLakeColumnHandle column : columns) {
            if (column.getColumnType() == PARTITION_KEY) {
                Optional<String> partitionValue = addEntry.getCanonicalPartitionValues().get(column.getName());
                if (partitionValue.isEmpty()) {
                    nullCounts.merge(column, (double) stats.getNumRecords().get(), Double::sum);
                } else {
                    // NULL is not counted as a distinct value
                    // Code below assumes that values returned by addEntry.getCanonicalPartitionValues() are normalized,
                    // it may not be true in case of real, doubles, timestamps etc
                    partitioningColumnsDistinctValues.get(column).add(partitionValue.get());
                }
            } else {
                Optional<Long> maybeNullCount = stats.getNullCount(column.getName());
                if (maybeNullCount.isPresent()) {
                    nullCounts.put(column, nullCounts.get(column) + maybeNullCount.get());
                } else {
                    // If any individual file fails to report null counts, fail to calculate the total for the table
                    nullCounts.put(column, NaN);
                }
            }
            // Math.min returns NaN if any operand is NaN
            stats.getMinColumnValue(column).map(parsedValue -> toStatsRepresentation(column.getType(), parsedValue)).filter(OptionalDouble::isPresent).map(OptionalDouble::getAsDouble).ifPresent(parsedValueAsDouble -> minValues.merge(column, parsedValueAsDouble, Math::min));
            stats.getMaxColumnValue(column).map(parsedValue -> toStatsRepresentation(column.getType(), parsedValue)).filter(OptionalDouble::isPresent).map(OptionalDouble::getAsDouble).ifPresent(parsedValueAsDouble -> maxValues.merge(column, parsedValueAsDouble, Math::max));
        }
    }
    if (numRecords == 0) {
        return createZeroStatistics(columns);
    }
    TableStatistics.Builder statsBuilder = new TableStatistics.Builder().setRowCount(Estimate.of(numRecords));
    Optional<DeltaLakeStatistics> statistics = Optional.empty();
    if (isExtendedStatisticsEnabled(session)) {
        statistics = statisticsAccess.readDeltaLakeStatistics(session, tableHandle.getLocation());
    }
    for (DeltaLakeColumnHandle column : columns) {
        ColumnStatistics.Builder columnStatsBuilder = new ColumnStatistics.Builder();
        Double nullCount = nullCounts.get(column);
        columnStatsBuilder.setNullsFraction(nullCount.isNaN() ? Estimate.unknown() : Estimate.of(nullCount / numRecords));
        Double maxValue = maxValues.get(column);
        Double minValue = minValues.get(column);
        if (isValidInRange(maxValue) && isValidInRange(minValue)) {
            columnStatsBuilder.setRange(new DoubleRange(minValue, maxValue));
        } else if (isValidInRange(maxValue)) {
            columnStatsBuilder.setRange(new DoubleRange(NEGATIVE_INFINITY, maxValue));
        } else if (isValidInRange(minValue)) {
            columnStatsBuilder.setRange(new DoubleRange(minValue, POSITIVE_INFINITY));
        }
        // extend statistics with NDV
        if (column.getColumnType() == PARTITION_KEY) {
            columnStatsBuilder.setDistinctValuesCount(Estimate.of(partitioningColumnsDistinctValues.get(column).size()));
        }
        if (statistics.isPresent()) {
            DeltaLakeColumnStatistics deltaLakeColumnStatistics = statistics.get().getColumnStatistics().get(column.getName());
            if (deltaLakeColumnStatistics != null && column.getColumnType() != PARTITION_KEY) {
                columnStatsBuilder.setDistinctValuesCount(Estimate.of(deltaLakeColumnStatistics.getNdvSummary().cardinality()));
            }
        }
        statsBuilder.setColumnStatistics(column, columnStatsBuilder.build());
    }
    return statsBuilder.build();
}
Also used : DeltaLakeStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeStatistics) POSITIVE_INFINITY(java.lang.Double.POSITIVE_INFINITY) PATH_PROPERTY(io.trino.plugin.deltalake.DeltaLakeMetadata.PATH_PROPERTY) TableSnapshot(io.trino.plugin.deltalake.transactionlog.TableSnapshot) Database(io.trino.plugin.hive.metastore.Database) NEGATIVE_INFINITY(java.lang.Double.NEGATIVE_INFINITY) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) TransactionLogAccess(io.trino.plugin.deltalake.transactionlog.TransactionLogAccess) StatsUtil.toStatsRepresentation(io.trino.spi.statistics.StatsUtil.toStatsRepresentation) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) DeltaLakeMetadata.createStatisticsPredicate(io.trino.plugin.deltalake.DeltaLakeMetadata.createStatisticsPredicate) NaN(java.lang.Double.NaN) DeltaLakeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) DeltaLakeTableHandle(io.trino.plugin.deltalake.DeltaLakeTableHandle) Table(io.trino.plugin.hive.metastore.Table) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) Set(java.util.Set) DeltaLakeSplitManager.partitionMatchesPredicate(io.trino.plugin.deltalake.DeltaLakeSplitManager.partitionMatchesPredicate) DeltaLakeSchemaSupport(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport) TrinoException(io.trino.spi.TrinoException) SchemaTableName(io.trino.spi.connector.SchemaTableName) String.format(java.lang.String.format) List(java.util.List) Optional(java.util.Optional) REGULAR(io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR) DoubleRange(io.trino.spi.statistics.DoubleRange) Constraint(io.trino.spi.connector.Constraint) DeltaLakeFileStatistics(io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics) CachingDeltaLakeStatisticsAccess(io.trino.plugin.deltalake.statistics.CachingDeltaLakeStatisticsAccess) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) DeltaLakeColumnStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeColumnStatistics) DELTA_LAKE_INVALID_TABLE(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_TABLE) OptionalDouble(java.util.OptionalDouble) HashMap(java.util.HashMap) HashSet(java.util.HashSet) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) Objects.requireNonNull(java.util.Objects.requireNonNull) TableStatistics(io.trino.spi.statistics.TableStatistics) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) DeltaLakeSessionProperties.isExtendedStatisticsEnabled(io.trino.plugin.deltalake.DeltaLakeSessionProperties.isExtendedStatisticsEnabled) Estimate(io.trino.spi.statistics.Estimate) ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) PARTITION_KEY(io.trino.plugin.deltalake.DeltaLakeColumnType.PARTITION_KEY) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) DELTA_LAKE_INVALID_SCHEMA(io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA) ProtocolEntry(io.trino.plugin.deltalake.transactionlog.ProtocolEntry) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) TypeManager(io.trino.spi.type.TypeManager) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) Set(java.util.Set) HashSet(java.util.HashSet) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) HashMap(java.util.HashMap) DeltaLakeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle) DeltaLakeStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeStatistics) DeltaLakeColumnStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeColumnStatistics) HashSet(java.util.HashSet) DeltaLakeColumnStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeColumnStatistics) ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) DeltaLakeFileStatistics(io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics) OptionalDouble(java.util.OptionalDouble) OptionalDouble(java.util.OptionalDouble) DoubleRange(io.trino.spi.statistics.DoubleRange) TableSnapshot(io.trino.plugin.deltalake.transactionlog.TableSnapshot) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) TrinoException(io.trino.spi.TrinoException) TableStatistics(io.trino.spi.statistics.TableStatistics)

Example 30 with TupleDomain

use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.

the class RemoveRedundantPredicateAboveTableScan method apply.

@Override
public Result apply(FilterNode filterNode, Captures captures, Context context) {
    Session session = context.getSession();
    TableScanNode node = captures.get(TABLE_SCAN);
    Expression predicate = filterNode.getPredicate();
    Expression deterministicPredicate = filterDeterministicConjuncts(plannerContext.getMetadata(), predicate);
    Expression nonDeterministicPredicate = filterNonDeterministicConjuncts(plannerContext.getMetadata(), predicate);
    ExtractionResult decomposedPredicate = getFullyExtractedPredicates(session, deterministicPredicate, context.getSymbolAllocator().getTypes());
    if (decomposedPredicate.getTupleDomain().isAll()) {
        // no conjunct could be fully converted to tuple domain
        return Result.empty();
    }
    TupleDomain<ColumnHandle> predicateDomain = decomposedPredicate.getTupleDomain().transformKeys(node.getAssignments()::get);
    if (predicateDomain.isNone()) {
        // to turn the subtree into a Values node
        return Result.ofPlanNode(new ValuesNode(node.getId(), node.getOutputSymbols(), ImmutableList.of()));
    }
    if (node.getEnforcedConstraint().isNone()) {
        // table scans with none domain should be converted to ValuesNode
        return Result.ofPlanNode(new ValuesNode(node.getId(), node.getOutputSymbols(), ImmutableList.of()));
    }
    // is not NONE
    Map<ColumnHandle, Domain> enforcedColumnDomains = node.getEnforcedConstraint().getDomains().orElseThrow();
    TupleDomain<ColumnHandle> unenforcedDomain = predicateDomain.transformDomains((columnHandle, predicateColumnDomain) -> {
        Type type = predicateColumnDomain.getType();
        Domain enforcedColumnDomain = Optional.ofNullable(enforcedColumnDomains.get(columnHandle)).orElseGet(() -> Domain.all(type));
        if (predicateColumnDomain.contains(enforcedColumnDomain)) {
            // full enforced
            return Domain.all(type);
        }
        return predicateColumnDomain.intersect(enforcedColumnDomain);
    });
    if (unenforcedDomain.equals(predicateDomain)) {
        // no change in filter predicate
        return Result.empty();
    }
    Map<ColumnHandle, Symbol> assignments = ImmutableBiMap.copyOf(node.getAssignments()).inverse();
    Expression resultingPredicate = createResultingPredicate(plannerContext, session, context.getSymbolAllocator(), typeAnalyzer, // Dynamic filters are included in decomposedPredicate.getRemainingExpression()
    TRUE_LITERAL, new DomainTranslator(plannerContext).toPredicate(session, unenforcedDomain.transformKeys(assignments::get)), nonDeterministicPredicate, decomposedPredicate.getRemainingExpression());
    if (!TRUE_LITERAL.equals(resultingPredicate)) {
        return Result.ofPlanNode(new FilterNode(context.getIdAllocator().getNextId(), node, resultingPredicate));
    }
    return Result.ofPlanNode(node);
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) ValuesNode(io.trino.sql.planner.plan.ValuesNode) Symbol(io.trino.sql.planner.Symbol) FilterNode(io.trino.sql.planner.plan.FilterNode) Type(io.trino.spi.type.Type) TableScanNode(io.trino.sql.planner.plan.TableScanNode) Expression(io.trino.sql.tree.Expression) DomainTranslator(io.trino.sql.planner.DomainTranslator) ExtractionResult(io.trino.sql.planner.DomainTranslator.ExtractionResult) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) Session(io.trino.Session)

Aggregations

TupleDomain (io.trino.spi.predicate.TupleDomain)97 Domain (io.trino.spi.predicate.Domain)77 Map (java.util.Map)50 ColumnHandle (io.trino.spi.connector.ColumnHandle)48 ImmutableMap (com.google.common.collect.ImmutableMap)43 ImmutableList (com.google.common.collect.ImmutableList)41 List (java.util.List)40 Optional (java.util.Optional)36 Set (java.util.Set)33 Test (org.testng.annotations.Test)33 Objects.requireNonNull (java.util.Objects.requireNonNull)32 ConnectorSession (io.trino.spi.connector.ConnectorSession)29 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)28 ImmutableSet (com.google.common.collect.ImmutableSet)26 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)23 Range (io.trino.spi.predicate.Range)22 String.format (java.lang.String.format)22 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)20 TrinoException (io.trino.spi.TrinoException)20 Type (io.trino.spi.type.Type)19