Search in sources :

Example 1 with SortedRangeSet

use of io.trino.spi.predicate.SortedRangeSet in project trino by trinodb.

the class KafkaFilterManager method filterRangeByDomain.

@VisibleForTesting
public static Optional<Range> filterRangeByDomain(Domain domain) {
    Long low = INVALID_KAFKA_RANGE_INDEX;
    Long high = INVALID_KAFKA_RANGE_INDEX;
    if (domain.isSingleValue()) {
        // still return range for single value case like (_partition_offset=XXX or _timestamp=XXX)
        low = (long) domain.getSingleValue();
        high = (long) domain.getSingleValue();
    } else {
        ValueSet valueSet = domain.getValues();
        if (valueSet instanceof SortedRangeSet) {
            // still return range for single value case like (_partition_offset in (XXX1,XXX2) or _timestamp in XXX1, XXX2)
            Ranges ranges = ((SortedRangeSet) valueSet).getRanges();
            List<io.trino.spi.predicate.Range> rangeList = ranges.getOrderedRanges();
            if (rangeList.stream().allMatch(io.trino.spi.predicate.Range::isSingleValue)) {
                List<Long> values = rangeList.stream().map(range -> (Long) range.getSingleValue()).collect(toImmutableList());
                low = Collections.min(values);
                high = Collections.max(values);
            } else {
                io.trino.spi.predicate.Range span = ranges.getSpan();
                low = getLowIncludedValue(span).orElse(low);
                high = getHighIncludedValue(span).orElse(high);
            }
        }
    }
    if (high != INVALID_KAFKA_RANGE_INDEX) {
        high = high + 1;
    }
    return Optional.of(new Range(low, high));
}
Also used : Config(org.apache.kafka.clients.admin.Config) DescribeConfigsResult(org.apache.kafka.clients.admin.DescribeConfigsResult) MICROSECONDS_PER_MILLISECOND(io.trino.spi.type.Timestamps.MICROSECONDS_PER_MILLISECOND) Type(io.trino.spi.type.Type) OFFSET_TIMESTAMP_FIELD(io.trino.plugin.kafka.KafkaInternalFieldManager.OFFSET_TIMESTAMP_FIELD) Function(java.util.function.Function) PARTITION_ID_FIELD(io.trino.plugin.kafka.KafkaInternalFieldManager.PARTITION_ID_FIELD) TimestampType(io.trino.spi.type.TimestampType) Inject(javax.inject.Inject) ConfigResource(org.apache.kafka.common.config.ConfigResource) Verify.verify(com.google.common.base.Verify.verify) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) Admin(org.apache.kafka.clients.admin.Admin) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) KAFKA_SPLIT_ERROR(io.trino.plugin.kafka.KafkaErrorCode.KAFKA_SPLIT_ERROR) Math.floorDiv(java.lang.Math.floorDiv) TopicPartition(org.apache.kafka.common.TopicPartition) PARTITION_OFFSET_FIELD(io.trino.plugin.kafka.KafkaInternalFieldManager.PARTITION_OFFSET_FIELD) ImmutableMap(com.google.common.collect.ImmutableMap) Ranges(io.trino.spi.predicate.Ranges) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) ConnectorSession(io.trino.spi.connector.ConnectorSession) PartitionInfo(org.apache.kafka.common.PartitionInfo) TupleDomain(io.trino.spi.predicate.TupleDomain) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) String.format(java.lang.String.format) ValueSet(io.trino.spi.predicate.ValueSet) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Optional(java.util.Optional) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Collections(java.util.Collections) SortedRangeSet(io.trino.spi.predicate.SortedRangeSet) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) Ranges(io.trino.spi.predicate.Ranges) SortedRangeSet(io.trino.spi.predicate.SortedRangeSet) ValueSet(io.trino.spi.predicate.ValueSet) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 2 with SortedRangeSet

use of io.trino.spi.predicate.SortedRangeSet in project trino by trinodb.

the class KuduClientSession method addConstraintPredicates.

/**
 * translates TupleDomain to KuduPredicates.
 */
private void addConstraintPredicates(KuduTable table, KuduScanToken.KuduScanTokenBuilder builder, TupleDomain<ColumnHandle> constraintSummary) {
    verify(!constraintSummary.isNone(), "constraintSummary is none");
    if (constraintSummary.isAll()) {
        return;
    }
    Schema schema = table.getSchema();
    for (TupleDomain.ColumnDomain<ColumnHandle> columnDomain : constraintSummary.getColumnDomains().get()) {
        int position = ((KuduColumnHandle) columnDomain.getColumn()).getOrdinalPosition();
        ColumnSchema columnSchema = schema.getColumnByIndex(position);
        Domain domain = columnDomain.getDomain();
        verify(!domain.isNone(), "Domain is none");
        if (domain.isAll()) {
        // no restriction
        } else if (domain.isOnlyNull()) {
            builder.addPredicate(KuduPredicate.newIsNullPredicate(columnSchema));
        } else if (!domain.getValues().isNone() && domain.isNullAllowed()) {
        // no restriction
        } else if (domain.getValues().isAll() && !domain.isNullAllowed()) {
            builder.addPredicate(KuduPredicate.newIsNotNullPredicate(columnSchema));
        } else if (domain.isSingleValue()) {
            KuduPredicate predicate = createEqualsPredicate(columnSchema, domain.getSingleValue());
            builder.addPredicate(predicate);
        } else {
            ValueSet valueSet = domain.getValues();
            if (valueSet instanceof EquatableValueSet) {
                DiscreteValues discreteValues = valueSet.getDiscreteValues();
                KuduPredicate predicate = createInListPredicate(columnSchema, discreteValues);
                builder.addPredicate(predicate);
            } else if (valueSet instanceof SortedRangeSet) {
                Ranges ranges = ((SortedRangeSet) valueSet).getRanges();
                List<Range> rangeList = ranges.getOrderedRanges();
                if (rangeList.stream().allMatch(Range::isSingleValue)) {
                    io.trino.spi.type.Type type = TypeHelper.fromKuduColumn(columnSchema);
                    List<Object> javaValues = rangeList.stream().map(range -> TypeHelper.getJavaValue(type, range.getSingleValue())).collect(toImmutableList());
                    KuduPredicate predicate = KuduPredicate.newInListPredicate(columnSchema, javaValues);
                    builder.addPredicate(predicate);
                } else {
                    Range span = ranges.getSpan();
                    if (!span.isLowUnbounded()) {
                        KuduPredicate.ComparisonOp op = span.isLowInclusive() ? GREATER_EQUAL : GREATER;
                        KuduPredicate predicate = createComparisonPredicate(columnSchema, op, span.getLowBoundedValue());
                        builder.addPredicate(predicate);
                    }
                    if (!span.isHighUnbounded()) {
                        KuduPredicate.ComparisonOp op = span.isHighInclusive() ? LESS_EQUAL : LESS;
                        KuduPredicate predicate = createComparisonPredicate(columnSchema, op, span.getHighBoundedValue());
                        builder.addPredicate(predicate);
                    }
                }
            } else {
                throw new IllegalStateException("Unexpected domain: " + domain);
            }
        }
    }
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) Ranges(io.trino.spi.predicate.Ranges) Schema(org.apache.kudu.Schema) ColumnSchema(org.apache.kudu.ColumnSchema) HashBucketSchema(org.apache.kudu.client.PartitionSchema.HashBucketSchema) EquatableValueSet(io.trino.spi.predicate.EquatableValueSet) ColumnSchema(org.apache.kudu.ColumnSchema) Range(io.trino.spi.predicate.Range) KuduPredicate(org.apache.kudu.client.KuduPredicate) Type(org.apache.kudu.Type) DecimalType(io.trino.spi.type.DecimalType) TupleDomain(io.trino.spi.predicate.TupleDomain) SortedRangeSet(io.trino.spi.predicate.SortedRangeSet) DiscreteValues(io.trino.spi.predicate.DiscreteValues) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) EquatableValueSet(io.trino.spi.predicate.EquatableValueSet) ValueSet(io.trino.spi.predicate.ValueSet)

Example 3 with SortedRangeSet

use of io.trino.spi.predicate.SortedRangeSet in project trino by trinodb.

the class DomainTranslator method extractDisjuncts.

private List<Expression> extractDisjuncts(Session session, Type type, Ranges ranges, SymbolReference reference) {
    List<Expression> disjuncts = new ArrayList<>();
    List<Expression> singleValues = new ArrayList<>();
    List<Range> orderedRanges = ranges.getOrderedRanges();
    SortedRangeSet sortedRangeSet = SortedRangeSet.copyOf(type, orderedRanges);
    SortedRangeSet complement = sortedRangeSet.complement();
    List<Range> singleValueExclusionsList = complement.getOrderedRanges().stream().filter(Range::isSingleValue).collect(toList());
    List<Range> originalUnionSingleValues = SortedRangeSet.copyOf(type, singleValueExclusionsList).union(sortedRangeSet).getOrderedRanges();
    PeekingIterator<Range> singleValueExclusions = peekingIterator(singleValueExclusionsList.iterator());
    /*
        For types including NaN, it is incorrect to introduce range "all" while processing a set of ranges,
        even if the component ranges cover the entire value set.
        This is because partial ranges don't include NaN, while range "all" does.
        Example: ranges (unbounded , 1.0) and (1.0, unbounded) should not be coalesced to (unbounded, unbounded) with excluded point 1.0.
        That result would be further translated to expression "xxx <> 1.0", which is satisfied by NaN.
        To avoid error, in such case the ranges are not optimised.
         */
    if (type instanceof RealType || type instanceof DoubleType) {
        boolean originalRangeIsAll = orderedRanges.stream().anyMatch(Range::isAll);
        boolean coalescedRangeIsAll = originalUnionSingleValues.stream().anyMatch(Range::isAll);
        if (!originalRangeIsAll && coalescedRangeIsAll) {
            for (Range range : orderedRanges) {
                disjuncts.add(processRange(session, type, range, reference));
            }
            return disjuncts;
        }
    }
    for (Range range : originalUnionSingleValues) {
        if (range.isSingleValue()) {
            singleValues.add(literalEncoder.toExpression(session, range.getSingleValue(), type));
            continue;
        }
        // attempt to optimize ranges that can be coalesced as long as single value points are excluded
        List<Expression> singleValuesInRange = new ArrayList<>();
        while (singleValueExclusions.hasNext() && range.contains(singleValueExclusions.peek())) {
            singleValuesInRange.add(literalEncoder.toExpression(session, singleValueExclusions.next().getSingleValue(), type));
        }
        if (!singleValuesInRange.isEmpty()) {
            disjuncts.add(combineRangeWithExcludedPoints(session, type, reference, range, singleValuesInRange));
            continue;
        }
        disjuncts.add(processRange(session, type, range, reference));
    }
    // Add back all of the possible single values either as an equality or an IN predicate
    if (singleValues.size() == 1) {
        disjuncts.add(new ComparisonExpression(EQUAL, reference, getOnlyElement(singleValues)));
    } else if (singleValues.size() > 1) {
        disjuncts.add(new InPredicate(reference, new InListExpression(singleValues)));
    }
    return disjuncts;
}
Also used : ArrayList(java.util.ArrayList) InListExpression(io.trino.sql.tree.InListExpression) Range(io.trino.spi.predicate.Range) InPredicate(io.trino.sql.tree.InPredicate) RealType(io.trino.spi.type.RealType) ComparisonExpression(io.trino.sql.tree.ComparisonExpression) SortedRangeSet(io.trino.spi.predicate.SortedRangeSet) ComparisonExpression(io.trino.sql.tree.ComparisonExpression) Expression(io.trino.sql.tree.Expression) InListExpression(io.trino.sql.tree.InListExpression) NotExpression(io.trino.sql.tree.NotExpression) LogicalExpression(io.trino.sql.tree.LogicalExpression) DoubleType(io.trino.spi.type.DoubleType)

Example 4 with SortedRangeSet

use of io.trino.spi.predicate.SortedRangeSet in project trino by trinodb.

the class KafkaFilterManager method filterValuesByDomain.

@VisibleForTesting
public static Set<Long> filterValuesByDomain(Domain domain, Set<Long> sourceValues) {
    requireNonNull(sourceValues, "sourceValues is none");
    if (domain.isSingleValue()) {
        long singleValue = (long) domain.getSingleValue();
        return sourceValues.stream().filter(sourceValue -> sourceValue == singleValue).collect(toImmutableSet());
    } else {
        ValueSet valueSet = domain.getValues();
        if (valueSet instanceof SortedRangeSet) {
            Ranges ranges = ((SortedRangeSet) valueSet).getRanges();
            List<io.trino.spi.predicate.Range> rangeList = ranges.getOrderedRanges();
            if (rangeList.stream().allMatch(io.trino.spi.predicate.Range::isSingleValue)) {
                return rangeList.stream().map(range -> (Long) range.getSingleValue()).filter(sourceValues::contains).collect(toImmutableSet());
            } else {
                // still return values for range case like (_partition_id > 1)
                io.trino.spi.predicate.Range span = ranges.getSpan();
                long low = getLowIncludedValue(span).orElse(0L);
                long high = getHighIncludedValue(span).orElse(Long.MAX_VALUE);
                return sourceValues.stream().filter(item -> item >= low && item <= high).collect(toImmutableSet());
            }
        }
    }
    return sourceValues;
}
Also used : Config(org.apache.kafka.clients.admin.Config) DescribeConfigsResult(org.apache.kafka.clients.admin.DescribeConfigsResult) MICROSECONDS_PER_MILLISECOND(io.trino.spi.type.Timestamps.MICROSECONDS_PER_MILLISECOND) Type(io.trino.spi.type.Type) OFFSET_TIMESTAMP_FIELD(io.trino.plugin.kafka.KafkaInternalFieldManager.OFFSET_TIMESTAMP_FIELD) Function(java.util.function.Function) PARTITION_ID_FIELD(io.trino.plugin.kafka.KafkaInternalFieldManager.PARTITION_ID_FIELD) TimestampType(io.trino.spi.type.TimestampType) Inject(javax.inject.Inject) ConfigResource(org.apache.kafka.common.config.ConfigResource) Verify.verify(com.google.common.base.Verify.verify) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ColumnHandle(io.trino.spi.connector.ColumnHandle) Admin(org.apache.kafka.clients.admin.Admin) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) KAFKA_SPLIT_ERROR(io.trino.plugin.kafka.KafkaErrorCode.KAFKA_SPLIT_ERROR) Math.floorDiv(java.lang.Math.floorDiv) TopicPartition(org.apache.kafka.common.TopicPartition) PARTITION_OFFSET_FIELD(io.trino.plugin.kafka.KafkaInternalFieldManager.PARTITION_OFFSET_FIELD) ImmutableMap(com.google.common.collect.ImmutableMap) Ranges(io.trino.spi.predicate.Ranges) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) ConnectorSession(io.trino.spi.connector.ConnectorSession) PartitionInfo(org.apache.kafka.common.PartitionInfo) TupleDomain(io.trino.spi.predicate.TupleDomain) OffsetAndTimestamp(org.apache.kafka.clients.consumer.OffsetAndTimestamp) String.format(java.lang.String.format) ValueSet(io.trino.spi.predicate.ValueSet) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Optional(java.util.Optional) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Collections(java.util.Collections) SortedRangeSet(io.trino.spi.predicate.SortedRangeSet) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) Ranges(io.trino.spi.predicate.Ranges) SortedRangeSet(io.trino.spi.predicate.SortedRangeSet) ValueSet(io.trino.spi.predicate.ValueSet) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

SortedRangeSet (io.trino.spi.predicate.SortedRangeSet)4 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)3 ColumnHandle (io.trino.spi.connector.ColumnHandle)3 Domain (io.trino.spi.predicate.Domain)3 Ranges (io.trino.spi.predicate.Ranges)3 TupleDomain (io.trino.spi.predicate.TupleDomain)3 ValueSet (io.trino.spi.predicate.ValueSet)3 List (java.util.List)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 Verify.verify (com.google.common.base.Verify.verify)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)2 Iterables.getOnlyElement (com.google.common.collect.Iterables.getOnlyElement)2 KAFKA_SPLIT_ERROR (io.trino.plugin.kafka.KafkaErrorCode.KAFKA_SPLIT_ERROR)2 OFFSET_TIMESTAMP_FIELD (io.trino.plugin.kafka.KafkaInternalFieldManager.OFFSET_TIMESTAMP_FIELD)2 PARTITION_ID_FIELD (io.trino.plugin.kafka.KafkaInternalFieldManager.PARTITION_ID_FIELD)2 PARTITION_OFFSET_FIELD (io.trino.plugin.kafka.KafkaInternalFieldManager.PARTITION_OFFSET_FIELD)2 TrinoException (io.trino.spi.TrinoException)2 ConnectorSession (io.trino.spi.connector.ConnectorSession)2 Range (io.trino.spi.predicate.Range)2