Search in sources :

Example 51 with Domain

use of io.trino.spi.predicate.Domain in project trino by trinodb.

the class KafkaFilterManager method filterRangeByDomain.

@VisibleForTesting
public static Optional<Range> filterRangeByDomain(Domain domain) {
    Long low = INVALID_KAFKA_RANGE_INDEX;
    Long high = INVALID_KAFKA_RANGE_INDEX;
    if (domain.isSingleValue()) {
        // still return range for single value case like (_partition_offset=XXX or _timestamp=XXX)
        low = (long) domain.getSingleValue();
        high = (long) domain.getSingleValue();
    } else {
        ValueSet valueSet = domain.getValues();
        if (valueSet instanceof SortedRangeSet) {
            // still return range for single value case like (_partition_offset in (XXX1,XXX2) or _timestamp in XXX1, XXX2)
            Ranges ranges = ((SortedRangeSet) valueSet).getRanges();
            List<io.trino.spi.predicate.Range> rangeList = ranges.getOrderedRanges();
            if (rangeList.stream().allMatch(io.trino.spi.predicate.Range::isSingleValue)) {
                List<Long> values = rangeList.stream().map(range -> (Long) range.getSingleValue()).collect(toImmutableList());
                low = Collections.min(values);
                high = Collections.max(values);
            } else {
                io.trino.spi.predicate.Range span = ranges.getSpan();
                low = getLowIncludedValue(span).orElse(low);
                high = getHighIncludedValue(span).orElse(high);
            }
        }
    }
    if (high != INVALID_KAFKA_RANGE_INDEX) {
        high = high + 1;
    }
    return Optional.of(new Range(low, high));
}
Also used : Config(org.apache.kafka.clients.admin.Config) DescribeConfigsResult(org.apache.kafka.clients.admin.DescribeConfigsResult) MICROSECONDS_PER_MILLISECOND(io.trino.spi.type.Timestamps.MICROSECONDS_PER_MILLISECOND) Type(io.trino.spi.type.Type) OFFSET_TIMESTAMP_FIELD(io.trino.plugin.kafka.KafkaInternalFieldManager.OFFSET_TIMESTAMP_FIELD) Function(java.util.function.Function) PARTITION_ID_FIELD(io.trino.plugin.kafka.KafkaInternalFieldManager.PARTITION_ID_FIELD) TimestampType(io.trino.spi.type.TimestampType) Inject(javax.inject.Inject) ConfigResource(org.apache.kafka.common.config.ConfigResource) Verify.verify(com.google.common.base.Verify.verify) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) Admin(org.apache.kafka.clients.admin.Admin) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) KAFKA_SPLIT_ERROR(io.trino.plugin.kafka.KafkaErrorCode.KAFKA_SPLIT_ERROR) Math.floorDiv(java.lang.Math.floorDiv) TopicPartition(org.apache.kafka.common.TopicPartition) PARTITION_OFFSET_FIELD(io.trino.plugin.kafka.KafkaInternalFieldManager.PARTITION_OFFSET_FIELD) ImmutableMap(com.google.common.collect.ImmutableMap) Ranges(io.trino.spi.predicate.Ranges) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) ConnectorSession(io.trino.spi.connector.ConnectorSession) PartitionInfo(org.apache.kafka.common.PartitionInfo) TupleDomain(io.trino.spi.predicate.TupleDomain) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) String.format(java.lang.String.format) ValueSet(io.trino.spi.predicate.ValueSet) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Optional(java.util.Optional) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Collections(java.util.Collections) SortedRangeSet(io.trino.spi.predicate.SortedRangeSet) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) Ranges(io.trino.spi.predicate.Ranges) SortedRangeSet(io.trino.spi.predicate.SortedRangeSet) ValueSet(io.trino.spi.predicate.ValueSet) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 52 with Domain

use of io.trino.spi.predicate.Domain in project trino by trinodb.

the class TestPrometheusSplit method testPredicatePushDownSetsUpperBoundOnly.

@Test
public void testPredicatePushDownSetsUpperBoundOnly() {
    long predicateHighValue = 1568638171999L;
    Range highRange = Range.lessThanOrEqual(TIMESTAMP_COLUMN_TYPE, packDateTimeWithZone(predicateHighValue, UTC_KEY));
    ValueSet valueSet = ValueSet.ofRanges(highRange);
    Domain testDomain = Domain.create(valueSet, false);
    TupleDomain<ColumnHandle> testTupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of(new PrometheusColumnHandle("timestamp", TIMESTAMP_COLUMN_TYPE, 2), testDomain));
    PrometheusTableHandle prometheusTableHandle = new PrometheusTableHandle("schemaName", "tableName").withPredicate(testTupleDomain);
    io.airlift.units.Duration maxQueryRangeDuration = new io.airlift.units.Duration(120, TimeUnit.SECONDS);
    io.airlift.units.Duration queryChunkSizeDuration = new io.airlift.units.Duration(30, TimeUnit.SECONDS);
    Instant now = ofEpochMilli(1568638171999L + 600000L);
    List<String> splitTimes = PrometheusSplitManager.generateTimesForSplits(now, maxQueryRangeDuration, queryChunkSizeDuration, prometheusTableHandle);
    TemporalAmount expectedMaxQueryAsTime = java.time.Duration.ofMillis(maxQueryRangeDuration.toMillis() + ((splitTimes.size() - 1) * OFFSET_MILLIS));
    String lastSplit = splitTimes.get(splitTimes.size() - 1);
    Instant lastSplitAsTime = ofEpochMilli(longFromDecimalSecondString(lastSplit));
    String earliestSplit = splitTimes.get(0);
    Instant earliestSplitAsTime = ofEpochMilli(longFromDecimalSecondString(earliestSplit));
    TemporalAmount queryChunkAsTime = java.time.Duration.ofMillis(queryChunkSizeDuration.toMillis());
    java.time.Duration actualMaxDuration = Duration.between(earliestSplitAsTime.minus(queryChunkAsTime), lastSplitAsTime);
    assertEquals(lastSplitAsTime.toEpochMilli(), 1568638171999L);
    assertEquals(actualMaxDuration, expectedMaxQueryAsTime);
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) Instant(java.time.Instant) Duration(java.time.Duration) PrometheusSplitManager.decimalSecondString(io.trino.plugin.prometheus.PrometheusSplitManager.decimalSecondString) Range(io.trino.spi.predicate.Range) TemporalAmount(java.time.temporal.TemporalAmount) Duration(java.time.Duration) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) ValueSet(io.trino.spi.predicate.ValueSet) Test(org.testng.annotations.Test)

Example 53 with Domain

use of io.trino.spi.predicate.Domain in project trino by trinodb.

the class ShardMetadataRecordCursor method getTableIds.

@VisibleForTesting
static Iterator<Long> getTableIds(Jdbi dbi, TupleDomain<Integer> tupleDomain) {
    Map<Integer, Domain> domains = tupleDomain.getDomains().get();
    Domain schemaNameDomain = domains.get(getColumnIndex(SHARD_METADATA, SCHEMA_NAME));
    Domain tableNameDomain = domains.get(getColumnIndex(SHARD_METADATA, TABLE_NAME));
    List<String> values = new ArrayList<>();
    StringBuilder sql = new StringBuilder("SELECT table_id FROM tables ");
    if (schemaNameDomain != null || tableNameDomain != null) {
        sql.append("WHERE ");
        List<String> predicates = new ArrayList<>();
        if (tableNameDomain != null && tableNameDomain.isSingleValue()) {
            predicates.add("table_name = ?");
            values.add(getStringValue(tableNameDomain.getSingleValue()));
        }
        if (schemaNameDomain != null && schemaNameDomain.isSingleValue()) {
            predicates.add("schema_name = ?");
            values.add(getStringValue(schemaNameDomain.getSingleValue()));
        }
        sql.append(Joiner.on(" AND ").join(predicates));
    }
    ImmutableList.Builder<Long> tableIds = ImmutableList.builder();
    try (Connection connection = dbi.open().getConnection();
        PreparedStatement statement = connection.prepareStatement(sql.toString())) {
        for (int i = 0; i < values.size(); i++) {
            statement.setString(i + 1, values.get(i));
        }
        try (ResultSet resultSet = statement.executeQuery()) {
            while (resultSet.next()) {
                tableIds.add(resultSet.getLong("table_id"));
            }
        }
    } catch (SQLException | JdbiException e) {
        throw metadataError(e);
    }
    return tableIds.build().iterator();
}
Also used : SQLException(java.sql.SQLException) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) Connection(java.sql.Connection) PreparedStatement(java.sql.PreparedStatement) ResultSet(java.sql.ResultSet) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) JdbiException(org.jdbi.v3.core.JdbiException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 54 with Domain

use of io.trino.spi.predicate.Domain in project trino by trinodb.

the class BaseSqlServerConnectorTest method testPredicatePushdown.

@Test
public void testPredicatePushdown() {
    // varchar equality
    assertThat(query("SELECT regionkey, nationkey, name FROM nation WHERE name = 'ROMANIA'")).matches("VALUES (BIGINT '3', BIGINT '19', CAST('ROMANIA' AS varchar(25)))").isNotFullyPushedDown(FilterNode.class);
    // varchar range
    assertThat(query("SELECT regionkey, nationkey, name FROM nation WHERE name BETWEEN 'POLAND' AND 'RPA'")).matches("VALUES (BIGINT '3', BIGINT '19', CAST('ROMANIA' AS varchar(25)))").isNotFullyPushedDown(FilterNode.class);
    // varchar IN without domain compaction
    assertThat(query("SELECT regionkey, nationkey, name FROM nation WHERE name IN ('POLAND', 'ROMANIA', 'VIETNAM')")).matches("VALUES " + "(BIGINT '3', BIGINT '19', CAST('ROMANIA' AS varchar(25))), " + "(BIGINT '2', BIGINT '21', CAST('VIETNAM' AS varchar(25)))").isNotFullyPushedDown(node(FilterNode.class, // verify that pushed down constraint is applied by the connector
    tableScan(tableHandle -> {
        TupleDomain<ColumnHandle> constraint = ((JdbcTableHandle) tableHandle).getConstraint();
        ColumnHandle nameColumn = constraint.getDomains().orElseThrow().keySet().stream().map(JdbcColumnHandle.class::cast).filter(column -> column.getColumnName().equals("name")).collect(onlyElement());
        return constraint.getDomains().get().get(nameColumn).equals(Domain.multipleValues(createVarcharType(25), ImmutableList.of(utf8Slice("POLAND"), utf8Slice("ROMANIA"), utf8Slice("VIETNAM"))));
    }, TupleDomain.all(), ImmutableMap.of())));
    // varchar IN with small compaction threshold
    assertThat(query(Session.builder(getSession()).setCatalogSessionProperty("sqlserver", "domain_compaction_threshold", "1").build(), "SELECT regionkey, nationkey, name FROM nation WHERE name IN ('POLAND', 'ROMANIA', 'VIETNAM')")).matches("VALUES " + "(BIGINT '3', BIGINT '19', CAST('ROMANIA' AS varchar(25))), " + "(BIGINT '2', BIGINT '21', CAST('VIETNAM' AS varchar(25)))").isNotFullyPushedDown(node(FilterNode.class, // verify that no constraint is applied by the connector
    tableScan(tableHandle -> ((JdbcTableHandle) tableHandle).getConstraint().isAll(), TupleDomain.all(), ImmutableMap.of())));
    // varchar different case
    assertThat(query("SELECT regionkey, nationkey, name FROM nation WHERE name = 'romania'")).returnsEmptyResult().isNotFullyPushedDown(FilterNode.class);
    // bigint equality
    assertThat(query("SELECT regionkey, nationkey, name FROM nation WHERE nationkey = 19")).matches("VALUES (BIGINT '3', BIGINT '19', CAST('ROMANIA' AS varchar(25)))").isFullyPushedDown();
    // bigint equality with small compaction threshold
    assertThat(query(Session.builder(getSession()).setCatalogSessionProperty("sqlserver", "domain_compaction_threshold", "1").build(), "SELECT regionkey, nationkey, name FROM nation WHERE nationkey IN (19, 21)")).matches("VALUES " + "(BIGINT '3', BIGINT '19', CAST('ROMANIA' AS varchar(25))), " + "(BIGINT '2', BIGINT '21', CAST('VIETNAM' AS varchar(25)))").isNotFullyPushedDown(FilterNode.class);
    // bigint range, with decimal to bigint simplification
    assertThat(query("SELECT regionkey, nationkey, name FROM nation WHERE nationkey BETWEEN 18.5 AND 19.5")).matches("VALUES (BIGINT '3', BIGINT '19', CAST('ROMANIA' AS varchar(25)))").isFullyPushedDown();
    // date equality
    assertThat(query("SELECT orderkey FROM orders WHERE orderdate = DATE '1992-09-29'")).matches("VALUES BIGINT '1250', 34406, 38436, 57570").isFullyPushedDown();
    // predicate over aggregation key (likely to be optimized before being pushed down into the connector)
    assertThat(query("SELECT * FROM (SELECT regionkey, sum(nationkey) FROM nation GROUP BY regionkey) WHERE regionkey = 3")).matches("VALUES (BIGINT '3', BIGINT '77')").isFullyPushedDown();
    // predicate over aggregation result
    assertThat(query("SELECT regionkey, sum(nationkey) FROM nation GROUP BY regionkey HAVING sum(nationkey) = 77")).matches("VALUES (BIGINT '3', BIGINT '77')").isFullyPushedDown();
    // decimals
    try (TestTable testTable = new TestTable(onRemoteDatabase(), "test_decimal_pushdown", "(short_decimal decimal(9, 3), long_decimal decimal(30, 10))", List.of("123.321, 123456789.987654321"))) {
        assertThat(query("SELECT * FROM " + testTable.getName() + " WHERE short_decimal <= 124")).matches("VALUES (CAST(123.321 AS decimal(9,3)), CAST(123456789.987654321 AS decimal(30, 10)))").isFullyPushedDown();
        assertThat(query("SELECT * FROM " + testTable.getName() + " WHERE short_decimal <= 124")).matches("VALUES (CAST(123.321 AS decimal(9,3)), CAST(123456789.987654321 AS decimal(30, 10)))").isFullyPushedDown();
        assertThat(query("SELECT * FROM " + testTable.getName() + " WHERE long_decimal <= 123456790")).matches("VALUES (CAST(123.321 AS decimal(9,3)), CAST(123456789.987654321 AS decimal(30, 10)))").isFullyPushedDown();
        assertThat(query("SELECT * FROM " + testTable.getName() + " WHERE short_decimal <= 123.321")).matches("VALUES (CAST(123.321 AS decimal(9,3)), CAST(123456789.987654321 AS decimal(30, 10)))").isFullyPushedDown();
        assertThat(query("SELECT * FROM " + testTable.getName() + " WHERE long_decimal <= 123456789.987654321")).matches("VALUES (CAST(123.321 AS decimal(9,3)), CAST(123456789.987654321 AS decimal(30, 10)))").isFullyPushedDown();
        assertThat(query("SELECT * FROM " + testTable.getName() + " WHERE short_decimal = 123.321")).matches("VALUES (CAST(123.321 AS decimal(9,3)), CAST(123456789.987654321 AS decimal(30, 10)))").isFullyPushedDown();
        assertThat(query("SELECT * FROM " + testTable.getName() + " WHERE long_decimal = 123456789.987654321")).matches("VALUES (CAST(123.321 AS decimal(9,3)), CAST(123456789.987654321 AS decimal(30, 10)))").isFullyPushedDown();
    }
}
Also used : SkipException(org.testng.SkipException) DataProvider(org.testng.annotations.DataProvider) IntStream.range(java.util.stream.IntStream.range) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Assert.assertEquals(org.testng.Assert.assertEquals) Test(org.testng.annotations.Test) TestTable(io.trino.testing.sql.TestTable) MoreCollectors.onlyElement(com.google.common.collect.MoreCollectors.onlyElement) FilterNode(io.trino.sql.planner.plan.FilterNode) NONE(io.trino.plugin.sqlserver.DataCompression.NONE) ImmutableList(com.google.common.collect.ImmutableList) ROW(io.trino.plugin.sqlserver.DataCompression.ROW) TestingConnectorBehavior(io.trino.testing.TestingConnectorBehavior) ColumnHandle(io.trino.spi.connector.ColumnHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) TestTable.randomTableSuffix(io.trino.testing.sql.TestTable.randomTableSuffix) Flaky(io.trino.testng.services.Flaky) ImmutableMap(com.google.common.collect.ImmutableMap) JdbcColumnHandle(io.trino.plugin.jdbc.JdbcColumnHandle) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) String.format(java.lang.String.format) Collectors.joining(java.util.stream.Collectors.joining) BaseJdbcConnectorTest(io.trino.plugin.jdbc.BaseJdbcConnectorTest) PlanMatchPattern.node(io.trino.sql.planner.assertions.PlanMatchPattern.node) List(java.util.List) PAGE(io.trino.plugin.sqlserver.DataCompression.PAGE) Optional(java.util.Optional) Assert.assertTrue(org.testng.Assert.assertTrue) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) JdbcTableHandle(io.trino.plugin.jdbc.JdbcTableHandle) PlanMatchPattern.tableScan(io.trino.sql.planner.assertions.PlanMatchPattern.tableScan) Session(io.trino.Session) ColumnHandle(io.trino.spi.connector.ColumnHandle) JdbcColumnHandle(io.trino.plugin.jdbc.JdbcColumnHandle) FilterNode(io.trino.sql.planner.plan.FilterNode) JdbcColumnHandle(io.trino.plugin.jdbc.JdbcColumnHandle) TestTable(io.trino.testing.sql.TestTable) JdbcTableHandle(io.trino.plugin.jdbc.JdbcTableHandle) Test(org.testng.annotations.Test) BaseJdbcConnectorTest(io.trino.plugin.jdbc.BaseJdbcConnectorTest)

Example 55 with Domain

use of io.trino.spi.predicate.Domain in project trino by trinodb.

the class IcebergPageSourceProvider method getParquetTupleDomain.

private static TupleDomain<ColumnDescriptor> getParquetTupleDomain(Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<IcebergColumnHandle> effectivePredicate) {
    if (effectivePredicate.isNone()) {
        return TupleDomain.none();
    }
    ImmutableMap.Builder<ColumnDescriptor, Domain> predicate = ImmutableMap.builder();
    effectivePredicate.getDomains().get().forEach((columnHandle, domain) -> {
        String baseType = columnHandle.getType().getTypeSignature().getBase();
        // skip looking up predicates for complex types as Parquet only stores stats for primitives
        if (!baseType.equals(StandardTypes.MAP) && !baseType.equals(StandardTypes.ARRAY) && !baseType.equals(StandardTypes.ROW)) {
            RichColumnDescriptor descriptor = descriptorsByPath.get(ImmutableList.of(columnHandle.getName()));
            if (descriptor != null) {
                predicate.put(descriptor, domain);
            }
        }
    });
    return TupleDomain.withColumnDomains(predicate.buildOrThrow());
}
Also used : RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) RichColumnDescriptor(io.trino.parquet.RichColumnDescriptor) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap)

Aggregations

Domain (io.trino.spi.predicate.Domain)120 TupleDomain (io.trino.spi.predicate.TupleDomain)107 ColumnHandle (io.trino.spi.connector.ColumnHandle)51 Test (org.testng.annotations.Test)45 Map (java.util.Map)38 ImmutableList (com.google.common.collect.ImmutableList)36 ImmutableMap (com.google.common.collect.ImmutableMap)33 List (java.util.List)27 Optional (java.util.Optional)25 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)23 Type (io.trino.spi.type.Type)23 ConnectorSession (io.trino.spi.connector.ConnectorSession)21 SchemaTableName (io.trino.spi.connector.SchemaTableName)21 Objects.requireNonNull (java.util.Objects.requireNonNull)21 Set (java.util.Set)20 Range (io.trino.spi.predicate.Range)19 ValueSet (io.trino.spi.predicate.ValueSet)18 Constraint (io.trino.spi.connector.Constraint)17 String.format (java.lang.String.format)17 ImmutableSet (com.google.common.collect.ImmutableSet)16