Search in sources :

Example 1 with ValueSet

use of io.trino.spi.predicate.ValueSet in project trino by trinodb.

the class PushPredicateThroughProjectIntoWindow method extractUpperBound.

private static OptionalInt extractUpperBound(TupleDomain<Symbol> tupleDomain, Symbol symbol) {
    if (tupleDomain.isNone()) {
        return OptionalInt.empty();
    }
    Domain rankingDomain = tupleDomain.getDomains().get().get(symbol);
    if (rankingDomain == null) {
        return OptionalInt.empty();
    }
    ValueSet values = rankingDomain.getValues();
    if (values.isAll() || values.isNone() || values.getRanges().getRangeCount() <= 0) {
        return OptionalInt.empty();
    }
    Range span = values.getRanges().getSpan();
    if (span.isHighUnbounded()) {
        return OptionalInt.empty();
    }
    long upperBound = (Long) span.getHighBoundedValue();
    if (!span.isHighInclusive()) {
        upperBound--;
    }
    if (upperBound >= Integer.MIN_VALUE && upperBound <= Integer.MAX_VALUE) {
        return OptionalInt.of(toIntExact(upperBound));
    }
    return OptionalInt.empty();
}
Also used : Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) Range(io.trino.spi.predicate.Range) ValueSet(io.trino.spi.predicate.ValueSet)

Example 2 with ValueSet

use of io.trino.spi.predicate.ValueSet in project trino by trinodb.

the class HiveBucketing method getHiveBucketFilter.

public static Optional<HiveBucketFilter> getHiveBucketFilter(HiveTableHandle hiveTable, TupleDomain<ColumnHandle> effectivePredicate) {
    if (hiveTable.getBucketHandle().isEmpty()) {
        return Optional.empty();
    }
    HiveBucketProperty hiveBucketProperty = hiveTable.getBucketHandle().get().toTableBucketProperty();
    List<Column> dataColumns = hiveTable.getDataColumns().stream().map(HiveColumnHandle::toMetastoreColumn).collect(toImmutableList());
    Optional<Map<ColumnHandle, List<NullableValue>>> bindings = TupleDomain.extractDiscreteValues(effectivePredicate);
    if (bindings.isEmpty()) {
        return Optional.empty();
    }
    Optional<Set<Integer>> buckets = getHiveBuckets(hiveBucketProperty, dataColumns, bindings.get());
    if (buckets.isPresent()) {
        return Optional.of(new HiveBucketFilter(buckets.get()));
    }
    Optional<Domain> domain = effectivePredicate.getDomains().flatMap(domains -> domains.entrySet().stream().filter(entry -> ((HiveColumnHandle) entry.getKey()).getName().equals(BUCKET_COLUMN_NAME)).findFirst().map(Entry::getValue));
    if (domain.isEmpty()) {
        return Optional.empty();
    }
    ValueSet values = domain.get().getValues();
    ImmutableSet.Builder<Integer> builder = ImmutableSet.builder();
    int bucketCount = hiveBucketProperty.getBucketCount();
    for (int i = 0; i < bucketCount; i++) {
        if (values.containsValue((long) i)) {
            builder.add(i);
        }
    }
    return Optional.of(new HiveBucketFilter(builder.build()));
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) BUCKETING_V2(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V2) Lists.cartesianProduct(com.google.common.collect.Lists.cartesianProduct) BUCKETING_V1(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V1) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) Column(io.trino.plugin.hive.metastore.Column) Map(java.util.Map) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) ImmutableSet(com.google.common.collect.ImmutableSet) Table(io.trino.plugin.hive.metastore.Table) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) HiveTimestampPrecision(io.trino.plugin.hive.HiveTimestampPrecision) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) ValueSet(io.trino.spi.predicate.ValueSet) Objects(java.util.Objects) List(java.util.List) BUCKET_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) StandardErrorCode(io.trino.spi.StandardErrorCode) Entry(java.util.Map.Entry) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) NullableValue(io.trino.spi.predicate.NullableValue) Page(io.trino.spi.Page) HashMap(java.util.HashMap) HiveBucketProperty(io.trino.plugin.hive.HiveBucketProperty) HashSet(java.util.HashSet) HiveType(io.trino.plugin.hive.HiveType) HIVE_INVALID_METADATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA) ImmutableList(com.google.common.collect.ImmutableList) HiveTableHandle(io.trino.plugin.hive.HiveTableHandle) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ColumnHandle(io.trino.spi.connector.ColumnHandle) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TABLE_BUCKETING_VERSION(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_BUCKETING_VERSION) HiveBucketHandle(io.trino.plugin.hive.HiveBucketHandle) HiveUtil.getRegularColumnHandles(io.trino.plugin.hive.util.HiveUtil.getRegularColumnHandles) HiveSessionProperties.getTimestampPrecision(io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision) SPARK_TABLE_PROVIDER_KEY(io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TypeManager(io.trino.spi.type.TypeManager) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) ValueSet(io.trino.spi.predicate.ValueSet) HashSet(java.util.HashSet) NullableValue(io.trino.spi.predicate.NullableValue) HiveBucketProperty(io.trino.plugin.hive.HiveBucketProperty) ImmutableSet(com.google.common.collect.ImmutableSet) Column(io.trino.plugin.hive.metastore.Column) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) Map(java.util.Map) HashMap(java.util.HashMap) ValueSet(io.trino.spi.predicate.ValueSet)

Example 3 with ValueSet

use of io.trino.spi.predicate.ValueSet in project trino by trinodb.

the class KafkaFilterManager method filterRangeByDomain.

@VisibleForTesting
public static Optional<Range> filterRangeByDomain(Domain domain) {
    Long low = INVALID_KAFKA_RANGE_INDEX;
    Long high = INVALID_KAFKA_RANGE_INDEX;
    if (domain.isSingleValue()) {
        // still return range for single value case like (_partition_offset=XXX or _timestamp=XXX)
        low = (long) domain.getSingleValue();
        high = (long) domain.getSingleValue();
    } else {
        ValueSet valueSet = domain.getValues();
        if (valueSet instanceof SortedRangeSet) {
            // still return range for single value case like (_partition_offset in (XXX1,XXX2) or _timestamp in XXX1, XXX2)
            Ranges ranges = ((SortedRangeSet) valueSet).getRanges();
            List<io.trino.spi.predicate.Range> rangeList = ranges.getOrderedRanges();
            if (rangeList.stream().allMatch(io.trino.spi.predicate.Range::isSingleValue)) {
                List<Long> values = rangeList.stream().map(range -> (Long) range.getSingleValue()).collect(toImmutableList());
                low = Collections.min(values);
                high = Collections.max(values);
            } else {
                io.trino.spi.predicate.Range span = ranges.getSpan();
                low = getLowIncludedValue(span).orElse(low);
                high = getHighIncludedValue(span).orElse(high);
            }
        }
    }
    if (high != INVALID_KAFKA_RANGE_INDEX) {
        high = high + 1;
    }
    return Optional.of(new Range(low, high));
}
Also used : Config(org.apache.kafka.clients.admin.Config) DescribeConfigsResult(org.apache.kafka.clients.admin.DescribeConfigsResult) MICROSECONDS_PER_MILLISECOND(io.trino.spi.type.Timestamps.MICROSECONDS_PER_MILLISECOND) Type(io.trino.spi.type.Type) OFFSET_TIMESTAMP_FIELD(io.trino.plugin.kafka.KafkaInternalFieldManager.OFFSET_TIMESTAMP_FIELD) Function(java.util.function.Function) PARTITION_ID_FIELD(io.trino.plugin.kafka.KafkaInternalFieldManager.PARTITION_ID_FIELD) TimestampType(io.trino.spi.type.TimestampType) Inject(javax.inject.Inject) ConfigResource(org.apache.kafka.common.config.ConfigResource) Verify.verify(com.google.common.base.Verify.verify) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) Admin(org.apache.kafka.clients.admin.Admin) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) KAFKA_SPLIT_ERROR(io.trino.plugin.kafka.KafkaErrorCode.KAFKA_SPLIT_ERROR) Math.floorDiv(java.lang.Math.floorDiv) TopicPartition(org.apache.kafka.common.TopicPartition) PARTITION_OFFSET_FIELD(io.trino.plugin.kafka.KafkaInternalFieldManager.PARTITION_OFFSET_FIELD) ImmutableMap(com.google.common.collect.ImmutableMap) Ranges(io.trino.spi.predicate.Ranges) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) ConnectorSession(io.trino.spi.connector.ConnectorSession) PartitionInfo(org.apache.kafka.common.PartitionInfo) TupleDomain(io.trino.spi.predicate.TupleDomain) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) String.format(java.lang.String.format) ValueSet(io.trino.spi.predicate.ValueSet) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Optional(java.util.Optional) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Collections(java.util.Collections) SortedRangeSet(io.trino.spi.predicate.SortedRangeSet) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) Ranges(io.trino.spi.predicate.Ranges) SortedRangeSet(io.trino.spi.predicate.SortedRangeSet) ValueSet(io.trino.spi.predicate.ValueSet) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 4 with ValueSet

use of io.trino.spi.predicate.ValueSet in project trino by trinodb.

the class TestPrometheusSplit method testPredicatePushDownSetsUpperBoundOnly.

@Test
public void testPredicatePushDownSetsUpperBoundOnly() {
    long predicateHighValue = 1568638171999L;
    Range highRange = Range.lessThanOrEqual(TIMESTAMP_COLUMN_TYPE, packDateTimeWithZone(predicateHighValue, UTC_KEY));
    ValueSet valueSet = ValueSet.ofRanges(highRange);
    Domain testDomain = Domain.create(valueSet, false);
    TupleDomain<ColumnHandle> testTupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of(new PrometheusColumnHandle("timestamp", TIMESTAMP_COLUMN_TYPE, 2), testDomain));
    PrometheusTableHandle prometheusTableHandle = new PrometheusTableHandle("schemaName", "tableName").withPredicate(testTupleDomain);
    io.airlift.units.Duration maxQueryRangeDuration = new io.airlift.units.Duration(120, TimeUnit.SECONDS);
    io.airlift.units.Duration queryChunkSizeDuration = new io.airlift.units.Duration(30, TimeUnit.SECONDS);
    Instant now = ofEpochMilli(1568638171999L + 600000L);
    List<String> splitTimes = PrometheusSplitManager.generateTimesForSplits(now, maxQueryRangeDuration, queryChunkSizeDuration, prometheusTableHandle);
    TemporalAmount expectedMaxQueryAsTime = java.time.Duration.ofMillis(maxQueryRangeDuration.toMillis() + ((splitTimes.size() - 1) * OFFSET_MILLIS));
    String lastSplit = splitTimes.get(splitTimes.size() - 1);
    Instant lastSplitAsTime = ofEpochMilli(longFromDecimalSecondString(lastSplit));
    String earliestSplit = splitTimes.get(0);
    Instant earliestSplitAsTime = ofEpochMilli(longFromDecimalSecondString(earliestSplit));
    TemporalAmount queryChunkAsTime = java.time.Duration.ofMillis(queryChunkSizeDuration.toMillis());
    java.time.Duration actualMaxDuration = Duration.between(earliestSplitAsTime.minus(queryChunkAsTime), lastSplitAsTime);
    assertEquals(lastSplitAsTime.toEpochMilli(), 1568638171999L);
    assertEquals(actualMaxDuration, expectedMaxQueryAsTime);
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) Instant(java.time.Instant) Duration(java.time.Duration) PrometheusSplitManager.decimalSecondString(io.trino.plugin.prometheus.PrometheusSplitManager.decimalSecondString) Range(io.trino.spi.predicate.Range) TemporalAmount(java.time.temporal.TemporalAmount) Duration(java.time.Duration) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) ValueSet(io.trino.spi.predicate.ValueSet) Test(org.testng.annotations.Test)

Example 5 with ValueSet

use of io.trino.spi.predicate.ValueSet in project trino by trinodb.

the class KuduClientSession method addConstraintPredicates.

/**
 * translates TupleDomain to KuduPredicates.
 */
private void addConstraintPredicates(KuduTable table, KuduScanToken.KuduScanTokenBuilder builder, TupleDomain<ColumnHandle> constraintSummary) {
    verify(!constraintSummary.isNone(), "constraintSummary is none");
    if (constraintSummary.isAll()) {
        return;
    }
    Schema schema = table.getSchema();
    for (TupleDomain.ColumnDomain<ColumnHandle> columnDomain : constraintSummary.getColumnDomains().get()) {
        int position = ((KuduColumnHandle) columnDomain.getColumn()).getOrdinalPosition();
        ColumnSchema columnSchema = schema.getColumnByIndex(position);
        Domain domain = columnDomain.getDomain();
        verify(!domain.isNone(), "Domain is none");
        if (domain.isAll()) {
        // no restriction
        } else if (domain.isOnlyNull()) {
            builder.addPredicate(KuduPredicate.newIsNullPredicate(columnSchema));
        } else if (!domain.getValues().isNone() && domain.isNullAllowed()) {
        // no restriction
        } else if (domain.getValues().isAll() && !domain.isNullAllowed()) {
            builder.addPredicate(KuduPredicate.newIsNotNullPredicate(columnSchema));
        } else if (domain.isSingleValue()) {
            KuduPredicate predicate = createEqualsPredicate(columnSchema, domain.getSingleValue());
            builder.addPredicate(predicate);
        } else {
            ValueSet valueSet = domain.getValues();
            if (valueSet instanceof EquatableValueSet) {
                DiscreteValues discreteValues = valueSet.getDiscreteValues();
                KuduPredicate predicate = createInListPredicate(columnSchema, discreteValues);
                builder.addPredicate(predicate);
            } else if (valueSet instanceof SortedRangeSet) {
                Ranges ranges = ((SortedRangeSet) valueSet).getRanges();
                List<Range> rangeList = ranges.getOrderedRanges();
                if (rangeList.stream().allMatch(Range::isSingleValue)) {
                    io.trino.spi.type.Type type = TypeHelper.fromKuduColumn(columnSchema);
                    List<Object> javaValues = rangeList.stream().map(range -> TypeHelper.getJavaValue(type, range.getSingleValue())).collect(toImmutableList());
                    KuduPredicate predicate = KuduPredicate.newInListPredicate(columnSchema, javaValues);
                    builder.addPredicate(predicate);
                } else {
                    Range span = ranges.getSpan();
                    if (!span.isLowUnbounded()) {
                        KuduPredicate.ComparisonOp op = span.isLowInclusive() ? GREATER_EQUAL : GREATER;
                        KuduPredicate predicate = createComparisonPredicate(columnSchema, op, span.getLowBoundedValue());
                        builder.addPredicate(predicate);
                    }
                    if (!span.isHighUnbounded()) {
                        KuduPredicate.ComparisonOp op = span.isHighInclusive() ? LESS_EQUAL : LESS;
                        KuduPredicate predicate = createComparisonPredicate(columnSchema, op, span.getHighBoundedValue());
                        builder.addPredicate(predicate);
                    }
                }
            } else {
                throw new IllegalStateException("Unexpected domain: " + domain);
            }
        }
    }
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) Ranges(io.trino.spi.predicate.Ranges) Schema(org.apache.kudu.Schema) ColumnSchema(org.apache.kudu.ColumnSchema) HashBucketSchema(org.apache.kudu.client.PartitionSchema.HashBucketSchema) EquatableValueSet(io.trino.spi.predicate.EquatableValueSet) ColumnSchema(org.apache.kudu.ColumnSchema) Range(io.trino.spi.predicate.Range) KuduPredicate(org.apache.kudu.client.KuduPredicate) Type(org.apache.kudu.Type) DecimalType(io.trino.spi.type.DecimalType) TupleDomain(io.trino.spi.predicate.TupleDomain) SortedRangeSet(io.trino.spi.predicate.SortedRangeSet) DiscreteValues(io.trino.spi.predicate.DiscreteValues) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) EquatableValueSet(io.trino.spi.predicate.EquatableValueSet) ValueSet(io.trino.spi.predicate.ValueSet)

Aggregations

ValueSet (io.trino.spi.predicate.ValueSet)15 Domain (io.trino.spi.predicate.Domain)13 TupleDomain (io.trino.spi.predicate.TupleDomain)13 Range (io.trino.spi.predicate.Range)11 ColumnHandle (io.trino.spi.connector.ColumnHandle)8 VisibleForTesting (com.google.common.annotations.VisibleForTesting)4 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)4 List (java.util.List)4 TrinoException (io.trino.spi.TrinoException)3 ConnectorSession (io.trino.spi.connector.ConnectorSession)3 Ranges (io.trino.spi.predicate.Ranges)3 SortedRangeSet (io.trino.spi.predicate.SortedRangeSet)3 String.format (java.lang.String.format)3 Instant (java.time.Instant)3 Map (java.util.Map)3 Optional (java.util.Optional)3 Set (java.util.Set)3 Test (org.testng.annotations.Test)3 Verify.verify (com.google.common.base.Verify.verify)2 ImmutableList (com.google.common.collect.ImmutableList)2