Search in sources :

Example 41 with TupleDomain

use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.

the class MaterializedViewUtils method generateBaseTablePredicates.

public static Map<SchemaTableName, Expression> generateBaseTablePredicates(Map<SchemaTableName, MaterializedViewStatus.MaterializedDataPredicates> predicatesFromBaseTables, Metadata metadata) {
    Map<SchemaTableName, Expression> baseTablePredicates = new HashMap<>();
    for (SchemaTableName baseTable : predicatesFromBaseTables.keySet()) {
        MaterializedViewStatus.MaterializedDataPredicates predicatesInfo = predicatesFromBaseTables.get(baseTable);
        List<String> partitionKeys = predicatesInfo.getColumnNames();
        ImmutableList<Expression> keyExpressions = partitionKeys.stream().map(Identifier::new).collect(toImmutableList());
        List<TupleDomain<String>> predicateDisjuncts = predicatesInfo.getPredicateDisjuncts();
        Expression disjunct = null;
        for (TupleDomain<String> predicateDisjunct : predicateDisjuncts) {
            Expression conjunct = null;
            Iterator<Expression> keyExpressionsIterator = keyExpressions.stream().iterator();
            Map<String, NullableValue> predicateKeyValue = extractFixedValues(predicateDisjunct).orElseThrow(() -> new IllegalStateException("predicateKeyValue is not present!"));
            for (String key : partitionKeys) {
                NullableValue nullableValue = predicateKeyValue.get(key);
                Expression expression;
                if (nullableValue.isNull()) {
                    expression = new IsNullPredicate(keyExpressionsIterator.next());
                } else {
                    LiteralEncoder literalEncoder = new LiteralEncoder(metadata.getBlockEncodingSerde());
                    Expression valueExpression = literalEncoder.toExpression(nullableValue.getValue(), nullableValue.getType(), false);
                    expression = new ComparisonExpression(EQUAL, keyExpressionsIterator.next(), valueExpression);
                }
                conjunct = conjunct == null ? expression : new LogicalBinaryExpression(AND, conjunct, expression);
            }
            disjunct = conjunct == null ? disjunct : disjunct == null ? conjunct : new LogicalBinaryExpression(OR, disjunct, conjunct);
        }
        // If no (fresh) partitions are found for table, that means we should not select from it
        if (disjunct == null) {
            disjunct = FALSE_LITERAL;
        }
        baseTablePredicates.put(baseTable, disjunct);
    }
    return baseTablePredicates;
}
Also used : LogicalBinaryExpression(com.facebook.presto.sql.tree.LogicalBinaryExpression) HashMap(java.util.HashMap) NullableValue(com.facebook.presto.common.predicate.NullableValue) SchemaTableName(com.facebook.presto.spi.SchemaTableName) ComparisonExpression(com.facebook.presto.sql.tree.ComparisonExpression) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) ComparisonExpression(com.facebook.presto.sql.tree.ComparisonExpression) LogicalBinaryExpression(com.facebook.presto.sql.tree.LogicalBinaryExpression) Expression(com.facebook.presto.sql.tree.Expression) LiteralEncoder(com.facebook.presto.sql.planner.LiteralEncoder) IsNullPredicate(com.facebook.presto.sql.tree.IsNullPredicate) MaterializedViewStatus(com.facebook.presto.spi.MaterializedViewStatus)

Example 42 with TupleDomain

use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.

the class TestOrcBloomFilters method testMatches.

@Test
public // simulate query on a 2 columns where 1 is used as part of the where, with and without bloom filter
void testMatches() {
    // stripe column
    Domain testingColumnHandleDomain = Domain.singleValue(BIGINT, 1234L);
    TupleDomain.ColumnDomain<String> column0 = new TupleDomain.ColumnDomain<>(COLUMN_0, testingColumnHandleDomain);
    // predicate consist of the bigint_0 = 1234
    TupleDomain<String> effectivePredicate = TupleDomain.fromColumnDomains(Optional.of(ImmutableList.of(column0)));
    TupleDomain<String> emptyEffectivePredicate = TupleDomain.all();
    // predicate column references
    List<ColumnReference<String>> columnReferences = ImmutableList.<ColumnReference<String>>builder().add(new ColumnReference<>(COLUMN_0, 0, BIGINT)).add(new ColumnReference<>(COLUMN_1, 1, BIGINT)).build();
    TupleDomainOrcPredicate<String> predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences, true, Optional.empty());
    TupleDomainOrcPredicate<String> emptyPredicate = new TupleDomainOrcPredicate<>(emptyEffectivePredicate, columnReferences, true, Optional.empty());
    // assemble a matching and a non-matching bloom filter
    HiveBloomFilter hiveBloomFilter = new HiveBloomFilter(new BloomFilter(1000, 0.01));
    OrcProto.BloomFilter emptyOrcBloomFilter = toOrcBloomFilter(hiveBloomFilter);
    hiveBloomFilter.addLong(1234);
    OrcProto.BloomFilter orcBloomFilter = toOrcBloomFilter(hiveBloomFilter);
    Map<Integer, ColumnStatistics> matchingStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, toHiveBloomFilter(orcBloomFilter), new IntegerStatistics(10L, 2000L, null)));
    Map<Integer, ColumnStatistics> nonMatchingStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, toHiveBloomFilter(emptyOrcBloomFilter), new IntegerStatistics(10L, 2000L, null)));
    Map<Integer, ColumnStatistics> withoutBloomFilterStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, null, new IntegerStatistics(10L, 2000L, null)));
    assertTrue(predicate.matches(1L, matchingStatisticsByColumnIndex));
    assertTrue(predicate.matches(1L, withoutBloomFilterStatisticsByColumnIndex));
    assertFalse(predicate.matches(1L, nonMatchingStatisticsByColumnIndex));
    assertTrue(emptyPredicate.matches(1L, matchingStatisticsByColumnIndex));
}
Also used : IntegerColumnStatistics(com.facebook.presto.orc.metadata.statistics.IntegerColumnStatistics) ColumnStatistics(com.facebook.presto.orc.metadata.statistics.ColumnStatistics) OrcProto(com.facebook.presto.orc.proto.OrcProto) IntegerColumnStatistics(com.facebook.presto.orc.metadata.statistics.IntegerColumnStatistics) HiveBloomFilter(com.facebook.presto.orc.metadata.statistics.HiveBloomFilter) TupleDomainOrcPredicate.checkInBloomFilter(com.facebook.presto.orc.TupleDomainOrcPredicate.checkInBloomFilter) BloomFilter(com.facebook.presto.orc.metadata.statistics.BloomFilter) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) HiveBloomFilter(com.facebook.presto.orc.metadata.statistics.HiveBloomFilter) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) ColumnReference(com.facebook.presto.orc.TupleDomainOrcPredicate.ColumnReference) IntegerStatistics(com.facebook.presto.orc.metadata.statistics.IntegerStatistics) Test(org.testng.annotations.Test)

Example 43 with TupleDomain

use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.

the class CassandraPartitionManager method getPartitions.

public CassandraPartitionResult getPartitions(ConnectorTableHandle tableHandle, TupleDomain<ColumnHandle> tupleDomain) {
    CassandraTableHandle cassandraTableHandle = (CassandraTableHandle) tableHandle;
    CassandraTable table = cassandraSession.getTable(cassandraTableHandle.getSchemaTableName());
    List<CassandraColumnHandle> partitionKeys = table.getPartitionKeyColumns();
    // fetch the partitions
    List<CassandraPartition> allPartitions = getCassandraPartitions(table, tupleDomain);
    log.debug("%s.%s #partitions: %d", cassandraTableHandle.getSchemaName(), cassandraTableHandle.getTableName(), allPartitions.size());
    // do a final pass to filter based on fields that could not be used to build the prefix
    List<CassandraPartition> partitions = allPartitions.stream().filter(partition -> tupleDomain.overlaps(partition.getTupleDomain())).collect(toList());
    // All partition key domains will be fully evaluated, so we don't need to include those
    TupleDomain<ColumnHandle> remainingTupleDomain = TupleDomain.none();
    if (!tupleDomain.isNone()) {
        if (partitions.size() == 1 && partitions.get(0).isUnpartitioned()) {
            remainingTupleDomain = tupleDomain;
        } else {
            @SuppressWarnings({ "rawtypes", "unchecked" }) List<ColumnHandle> partitionColumns = (List) partitionKeys;
            remainingTupleDomain = TupleDomain.withColumnDomains(Maps.filterKeys(tupleDomain.getDomains().get(), not(in(partitionColumns))));
        }
    }
    // push down indexed column fixed value predicates only for unpartitioned partition which uses token range query
    if ((partitions.size() == 1) && partitions.get(0).isUnpartitioned()) {
        Map<ColumnHandle, Domain> domains = tupleDomain.getDomains().get();
        List<ColumnHandle> indexedColumns = new ArrayList<>();
        // compose partitionId by using indexed column
        StringBuilder sb = new StringBuilder();
        for (Map.Entry<ColumnHandle, Domain> entry : domains.entrySet()) {
            CassandraColumnHandle column = (CassandraColumnHandle) entry.getKey();
            Domain domain = entry.getValue();
            if (column.isIndexed() && domain.isSingleValue()) {
                sb.append(CassandraCqlUtils.validColumnName(column.getName())).append(" = ").append(CassandraCqlUtils.cqlValue(toCQLCompatibleString(entry.getValue().getSingleValue()), column.getCassandraType()));
                indexedColumns.add(column);
                // Only one indexed column predicate can be pushed down.
                break;
            }
        }
        if (sb.length() > 0) {
            CassandraPartition partition = partitions.get(0);
            TupleDomain<ColumnHandle> filterIndexedColumn = TupleDomain.withColumnDomains(Maps.filterKeys(remainingTupleDomain.getDomains().get(), not(in(indexedColumns))));
            partitions = new ArrayList<>();
            partitions.add(new CassandraPartition(partition.getKey(), sb.toString(), filterIndexedColumn, true));
            return new CassandraPartitionResult(partitions, filterIndexedColumn);
        }
    }
    return new CassandraPartitionResult(partitions, remainingTupleDomain);
}
Also used : Logger(com.facebook.airlift.log.Logger) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) CassandraCqlUtils.toCQLCompatibleString(com.facebook.presto.cassandra.util.CassandraCqlUtils.toCQLCompatibleString) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) Maps(com.google.common.collect.Maps) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) Sets(com.google.common.collect.Sets) Range(com.facebook.presto.common.predicate.Range) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) List(java.util.List) Predicates.in(com.google.common.base.Predicates.in) Collectors.toList(java.util.stream.Collectors.toList) ImmutableList(com.google.common.collect.ImmutableList) Predicates.not(com.google.common.base.Predicates.not) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) CassandraCqlUtils(com.facebook.presto.cassandra.util.CassandraCqlUtils) ColumnHandle(com.facebook.presto.spi.ColumnHandle) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) ImmutableList(com.google.common.collect.ImmutableList) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) Map(java.util.Map)

Example 44 with TupleDomain

use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.

the class CassandraClusteringPredicatesExtractor method getClusteringKeysSet.

private static ClusteringPushDownResult getClusteringKeysSet(List<CassandraColumnHandle> clusteringColumns, TupleDomain<ColumnHandle> predicates, VersionNumber cassandraVersion) {
    ImmutableMap.Builder<ColumnHandle, Domain> domainsBuilder = ImmutableMap.builder();
    ImmutableList.Builder<String> clusteringColumnSql = ImmutableList.builder();
    int currentClusteringColumn = 0;
    for (CassandraColumnHandle columnHandle : clusteringColumns) {
        Domain domain = predicates.getDomains().get().get(columnHandle);
        if (domain == null) {
            break;
        }
        if (domain.isNullAllowed()) {
            break;
        }
        String predicateString = null;
        predicateString = domain.getValues().getValuesProcessor().transform(ranges -> {
            List<Object> singleValues = new ArrayList<>();
            List<String> rangeConjuncts = new ArrayList<>();
            String predicate = null;
            for (Range range : ranges.getOrderedRanges()) {
                if (range.isAll()) {
                    return null;
                }
                if (range.isSingleValue()) {
                    singleValues.add(CassandraCqlUtils.cqlValue(toCQLCompatibleString(range.getSingleValue()), columnHandle.getCassandraType()));
                } else {
                    if (!range.isLowUnbounded()) {
                        rangeConjuncts.add(format("%s %s %s", CassandraCqlUtils.validColumnName(columnHandle.getName()), range.isLowInclusive() ? ">=" : ">", CassandraCqlUtils.cqlValue(toCQLCompatibleString(range.getLowBoundedValue()), columnHandle.getCassandraType())));
                    }
                    if (!range.isHighUnbounded()) {
                        rangeConjuncts.add(format("%s %s %s", CassandraCqlUtils.validColumnName(columnHandle.getName()), range.isHighInclusive() ? "<=" : "<", CassandraCqlUtils.cqlValue(toCQLCompatibleString(range.getHighBoundedValue()), columnHandle.getCassandraType())));
                    }
                }
            }
            if (!singleValues.isEmpty() && !rangeConjuncts.isEmpty()) {
                return null;
            }
            if (!singleValues.isEmpty()) {
                if (singleValues.size() == 1) {
                    predicate = CassandraCqlUtils.validColumnName(columnHandle.getName()) + " = " + singleValues.get(0);
                } else {
                    predicate = CassandraCqlUtils.validColumnName(columnHandle.getName()) + " IN (" + Joiner.on(",").join(singleValues) + ")";
                }
            } else if (!rangeConjuncts.isEmpty()) {
                predicate = Joiner.on(" AND ").join(rangeConjuncts);
            }
            return predicate;
        }, discreteValues -> {
            if (discreteValues.isWhiteList()) {
                ImmutableList.Builder<Object> discreteValuesList = ImmutableList.builder();
                for (Object discreteValue : discreteValues.getValues()) {
                    discreteValuesList.add(CassandraCqlUtils.cqlValue(toCQLCompatibleString(discreteValue), columnHandle.getCassandraType()));
                }
                String predicate = CassandraCqlUtils.validColumnName(columnHandle.getName()) + " IN (" + Joiner.on(",").join(discreteValuesList.build()) + ")";
                return predicate;
            }
            return null;
        }, allOrNone -> null);
        if (predicateString == null) {
            break;
        }
        // IN restriction only on last clustering column for Cassandra version = 2.1
        if (predicateString.contains(" IN (") && cassandraVersion.compareTo(VersionNumber.parse("2.2.0")) < 0 && currentClusteringColumn != (clusteringColumns.size() - 1)) {
            break;
        }
        clusteringColumnSql.add(predicateString);
        domainsBuilder.put(columnHandle, domain);
        // Check for last clustering column should only be restricted by range condition
        if (predicateString.contains(">") || predicateString.contains("<")) {
            break;
        }
        currentClusteringColumn++;
    }
    List<String> clusteringColumnPredicates = clusteringColumnSql.build();
    return new ClusteringPushDownResult(domainsBuilder.build(), Joiner.on(" AND ").join(clusteringColumnPredicates));
}
Also used : ImmutableMap(com.google.common.collect.ImmutableMap) CassandraCqlUtils.toCQLCompatibleString(com.facebook.presto.cassandra.util.CassandraCqlUtils.toCQLCompatibleString) HashMap(java.util.HashMap) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) String.format(java.lang.String.format) Range(com.facebook.presto.common.predicate.Range) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) CassandraCqlUtils(com.facebook.presto.cassandra.util.CassandraCqlUtils) VersionNumber(com.datastax.driver.core.VersionNumber) Joiner(com.google.common.base.Joiner) ColumnHandle(com.facebook.presto.spi.ColumnHandle) ImmutableList(com.google.common.collect.ImmutableList) CassandraCqlUtils.toCQLCompatibleString(com.facebook.presto.cassandra.util.CassandraCqlUtils.toCQLCompatibleString) Range(com.facebook.presto.common.predicate.Range) ImmutableMap(com.google.common.collect.ImmutableMap) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain)

Example 45 with TupleDomain

use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.

the class IcebergPageSourceProvider method getParquetTupleDomain.

private static TupleDomain<ColumnDescriptor> getParquetTupleDomain(Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<IcebergColumnHandle> effectivePredicate) {
    if (effectivePredicate.isNone()) {
        return TupleDomain.none();
    }
    ImmutableMap.Builder<ColumnDescriptor, Domain> predicate = ImmutableMap.builder();
    effectivePredicate.getDomains().get().forEach((columnHandle, domain) -> {
        String baseType = columnHandle.getType().getTypeSignature().getBase();
        // skip looking up predicates for complex types as Parquet only stores stats for primitives
        if (!baseType.equals(StandardTypes.MAP) && !baseType.equals(StandardTypes.ARRAY) && !baseType.equals(StandardTypes.ROW)) {
            RichColumnDescriptor descriptor = descriptorsByPath.get(ImmutableList.of(columnHandle.getName()));
            if (descriptor != null) {
                predicate.put(descriptor, domain);
            }
        }
    });
    return TupleDomain.withColumnDomains(predicate.build());
}
Also used : RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap)

Aggregations

TupleDomain (com.facebook.presto.common.predicate.TupleDomain)76 Domain (com.facebook.presto.common.predicate.Domain)54 Map (java.util.Map)39 ColumnHandle (com.facebook.presto.spi.ColumnHandle)36 ImmutableList (com.google.common.collect.ImmutableList)33 ImmutableMap (com.google.common.collect.ImmutableMap)31 List (java.util.List)27 Optional (java.util.Optional)26 Objects.requireNonNull (java.util.Objects.requireNonNull)25 Test (org.testng.annotations.Test)25 ConnectorSession (com.facebook.presto.spi.ConnectorSession)22 SchemaTableName (com.facebook.presto.spi.SchemaTableName)22 Set (java.util.Set)21 Type (com.facebook.presto.common.type.Type)20 PrestoException (com.facebook.presto.spi.PrestoException)20 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)19 ImmutableSet (com.google.common.collect.ImmutableSet)19 String.format (java.lang.String.format)19 NullableValue (com.facebook.presto.common.predicate.NullableValue)17 TypeManager (com.facebook.presto.common.type.TypeManager)15