use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class MaterializedViewUtils method generateBaseTablePredicates.
public static Map<SchemaTableName, Expression> generateBaseTablePredicates(Map<SchemaTableName, MaterializedViewStatus.MaterializedDataPredicates> predicatesFromBaseTables, Metadata metadata) {
Map<SchemaTableName, Expression> baseTablePredicates = new HashMap<>();
for (SchemaTableName baseTable : predicatesFromBaseTables.keySet()) {
MaterializedViewStatus.MaterializedDataPredicates predicatesInfo = predicatesFromBaseTables.get(baseTable);
List<String> partitionKeys = predicatesInfo.getColumnNames();
ImmutableList<Expression> keyExpressions = partitionKeys.stream().map(Identifier::new).collect(toImmutableList());
List<TupleDomain<String>> predicateDisjuncts = predicatesInfo.getPredicateDisjuncts();
Expression disjunct = null;
for (TupleDomain<String> predicateDisjunct : predicateDisjuncts) {
Expression conjunct = null;
Iterator<Expression> keyExpressionsIterator = keyExpressions.stream().iterator();
Map<String, NullableValue> predicateKeyValue = extractFixedValues(predicateDisjunct).orElseThrow(() -> new IllegalStateException("predicateKeyValue is not present!"));
for (String key : partitionKeys) {
NullableValue nullableValue = predicateKeyValue.get(key);
Expression expression;
if (nullableValue.isNull()) {
expression = new IsNullPredicate(keyExpressionsIterator.next());
} else {
LiteralEncoder literalEncoder = new LiteralEncoder(metadata.getBlockEncodingSerde());
Expression valueExpression = literalEncoder.toExpression(nullableValue.getValue(), nullableValue.getType(), false);
expression = new ComparisonExpression(EQUAL, keyExpressionsIterator.next(), valueExpression);
}
conjunct = conjunct == null ? expression : new LogicalBinaryExpression(AND, conjunct, expression);
}
disjunct = conjunct == null ? disjunct : disjunct == null ? conjunct : new LogicalBinaryExpression(OR, disjunct, conjunct);
}
// If no (fresh) partitions are found for table, that means we should not select from it
if (disjunct == null) {
disjunct = FALSE_LITERAL;
}
baseTablePredicates.put(baseTable, disjunct);
}
return baseTablePredicates;
}
use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class TestOrcBloomFilters method testMatches.
@Test
public // simulate query on a 2 columns where 1 is used as part of the where, with and without bloom filter
void testMatches() {
// stripe column
Domain testingColumnHandleDomain = Domain.singleValue(BIGINT, 1234L);
TupleDomain.ColumnDomain<String> column0 = new TupleDomain.ColumnDomain<>(COLUMN_0, testingColumnHandleDomain);
// predicate consist of the bigint_0 = 1234
TupleDomain<String> effectivePredicate = TupleDomain.fromColumnDomains(Optional.of(ImmutableList.of(column0)));
TupleDomain<String> emptyEffectivePredicate = TupleDomain.all();
// predicate column references
List<ColumnReference<String>> columnReferences = ImmutableList.<ColumnReference<String>>builder().add(new ColumnReference<>(COLUMN_0, 0, BIGINT)).add(new ColumnReference<>(COLUMN_1, 1, BIGINT)).build();
TupleDomainOrcPredicate<String> predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences, true, Optional.empty());
TupleDomainOrcPredicate<String> emptyPredicate = new TupleDomainOrcPredicate<>(emptyEffectivePredicate, columnReferences, true, Optional.empty());
// assemble a matching and a non-matching bloom filter
HiveBloomFilter hiveBloomFilter = new HiveBloomFilter(new BloomFilter(1000, 0.01));
OrcProto.BloomFilter emptyOrcBloomFilter = toOrcBloomFilter(hiveBloomFilter);
hiveBloomFilter.addLong(1234);
OrcProto.BloomFilter orcBloomFilter = toOrcBloomFilter(hiveBloomFilter);
Map<Integer, ColumnStatistics> matchingStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, toHiveBloomFilter(orcBloomFilter), new IntegerStatistics(10L, 2000L, null)));
Map<Integer, ColumnStatistics> nonMatchingStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, toHiveBloomFilter(emptyOrcBloomFilter), new IntegerStatistics(10L, 2000L, null)));
Map<Integer, ColumnStatistics> withoutBloomFilterStatisticsByColumnIndex = ImmutableMap.of(0, new IntegerColumnStatistics(null, null, new IntegerStatistics(10L, 2000L, null)));
assertTrue(predicate.matches(1L, matchingStatisticsByColumnIndex));
assertTrue(predicate.matches(1L, withoutBloomFilterStatisticsByColumnIndex));
assertFalse(predicate.matches(1L, nonMatchingStatisticsByColumnIndex));
assertTrue(emptyPredicate.matches(1L, matchingStatisticsByColumnIndex));
}
use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class CassandraPartitionManager method getPartitions.
public CassandraPartitionResult getPartitions(ConnectorTableHandle tableHandle, TupleDomain<ColumnHandle> tupleDomain) {
CassandraTableHandle cassandraTableHandle = (CassandraTableHandle) tableHandle;
CassandraTable table = cassandraSession.getTable(cassandraTableHandle.getSchemaTableName());
List<CassandraColumnHandle> partitionKeys = table.getPartitionKeyColumns();
// fetch the partitions
List<CassandraPartition> allPartitions = getCassandraPartitions(table, tupleDomain);
log.debug("%s.%s #partitions: %d", cassandraTableHandle.getSchemaName(), cassandraTableHandle.getTableName(), allPartitions.size());
// do a final pass to filter based on fields that could not be used to build the prefix
List<CassandraPartition> partitions = allPartitions.stream().filter(partition -> tupleDomain.overlaps(partition.getTupleDomain())).collect(toList());
// All partition key domains will be fully evaluated, so we don't need to include those
TupleDomain<ColumnHandle> remainingTupleDomain = TupleDomain.none();
if (!tupleDomain.isNone()) {
if (partitions.size() == 1 && partitions.get(0).isUnpartitioned()) {
remainingTupleDomain = tupleDomain;
} else {
@SuppressWarnings({ "rawtypes", "unchecked" }) List<ColumnHandle> partitionColumns = (List) partitionKeys;
remainingTupleDomain = TupleDomain.withColumnDomains(Maps.filterKeys(tupleDomain.getDomains().get(), not(in(partitionColumns))));
}
}
// push down indexed column fixed value predicates only for unpartitioned partition which uses token range query
if ((partitions.size() == 1) && partitions.get(0).isUnpartitioned()) {
Map<ColumnHandle, Domain> domains = tupleDomain.getDomains().get();
List<ColumnHandle> indexedColumns = new ArrayList<>();
// compose partitionId by using indexed column
StringBuilder sb = new StringBuilder();
for (Map.Entry<ColumnHandle, Domain> entry : domains.entrySet()) {
CassandraColumnHandle column = (CassandraColumnHandle) entry.getKey();
Domain domain = entry.getValue();
if (column.isIndexed() && domain.isSingleValue()) {
sb.append(CassandraCqlUtils.validColumnName(column.getName())).append(" = ").append(CassandraCqlUtils.cqlValue(toCQLCompatibleString(entry.getValue().getSingleValue()), column.getCassandraType()));
indexedColumns.add(column);
// Only one indexed column predicate can be pushed down.
break;
}
}
if (sb.length() > 0) {
CassandraPartition partition = partitions.get(0);
TupleDomain<ColumnHandle> filterIndexedColumn = TupleDomain.withColumnDomains(Maps.filterKeys(remainingTupleDomain.getDomains().get(), not(in(indexedColumns))));
partitions = new ArrayList<>();
partitions.add(new CassandraPartition(partition.getKey(), sb.toString(), filterIndexedColumn, true));
return new CassandraPartitionResult(partitions, filterIndexedColumn);
}
}
return new CassandraPartitionResult(partitions, remainingTupleDomain);
}
use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class CassandraClusteringPredicatesExtractor method getClusteringKeysSet.
private static ClusteringPushDownResult getClusteringKeysSet(List<CassandraColumnHandle> clusteringColumns, TupleDomain<ColumnHandle> predicates, VersionNumber cassandraVersion) {
ImmutableMap.Builder<ColumnHandle, Domain> domainsBuilder = ImmutableMap.builder();
ImmutableList.Builder<String> clusteringColumnSql = ImmutableList.builder();
int currentClusteringColumn = 0;
for (CassandraColumnHandle columnHandle : clusteringColumns) {
Domain domain = predicates.getDomains().get().get(columnHandle);
if (domain == null) {
break;
}
if (domain.isNullAllowed()) {
break;
}
String predicateString = null;
predicateString = domain.getValues().getValuesProcessor().transform(ranges -> {
List<Object> singleValues = new ArrayList<>();
List<String> rangeConjuncts = new ArrayList<>();
String predicate = null;
for (Range range : ranges.getOrderedRanges()) {
if (range.isAll()) {
return null;
}
if (range.isSingleValue()) {
singleValues.add(CassandraCqlUtils.cqlValue(toCQLCompatibleString(range.getSingleValue()), columnHandle.getCassandraType()));
} else {
if (!range.isLowUnbounded()) {
rangeConjuncts.add(format("%s %s %s", CassandraCqlUtils.validColumnName(columnHandle.getName()), range.isLowInclusive() ? ">=" : ">", CassandraCqlUtils.cqlValue(toCQLCompatibleString(range.getLowBoundedValue()), columnHandle.getCassandraType())));
}
if (!range.isHighUnbounded()) {
rangeConjuncts.add(format("%s %s %s", CassandraCqlUtils.validColumnName(columnHandle.getName()), range.isHighInclusive() ? "<=" : "<", CassandraCqlUtils.cqlValue(toCQLCompatibleString(range.getHighBoundedValue()), columnHandle.getCassandraType())));
}
}
}
if (!singleValues.isEmpty() && !rangeConjuncts.isEmpty()) {
return null;
}
if (!singleValues.isEmpty()) {
if (singleValues.size() == 1) {
predicate = CassandraCqlUtils.validColumnName(columnHandle.getName()) + " = " + singleValues.get(0);
} else {
predicate = CassandraCqlUtils.validColumnName(columnHandle.getName()) + " IN (" + Joiner.on(",").join(singleValues) + ")";
}
} else if (!rangeConjuncts.isEmpty()) {
predicate = Joiner.on(" AND ").join(rangeConjuncts);
}
return predicate;
}, discreteValues -> {
if (discreteValues.isWhiteList()) {
ImmutableList.Builder<Object> discreteValuesList = ImmutableList.builder();
for (Object discreteValue : discreteValues.getValues()) {
discreteValuesList.add(CassandraCqlUtils.cqlValue(toCQLCompatibleString(discreteValue), columnHandle.getCassandraType()));
}
String predicate = CassandraCqlUtils.validColumnName(columnHandle.getName()) + " IN (" + Joiner.on(",").join(discreteValuesList.build()) + ")";
return predicate;
}
return null;
}, allOrNone -> null);
if (predicateString == null) {
break;
}
// IN restriction only on last clustering column for Cassandra version = 2.1
if (predicateString.contains(" IN (") && cassandraVersion.compareTo(VersionNumber.parse("2.2.0")) < 0 && currentClusteringColumn != (clusteringColumns.size() - 1)) {
break;
}
clusteringColumnSql.add(predicateString);
domainsBuilder.put(columnHandle, domain);
// Check for last clustering column should only be restricted by range condition
if (predicateString.contains(">") || predicateString.contains("<")) {
break;
}
currentClusteringColumn++;
}
List<String> clusteringColumnPredicates = clusteringColumnSql.build();
return new ClusteringPushDownResult(domainsBuilder.build(), Joiner.on(" AND ").join(clusteringColumnPredicates));
}
use of com.facebook.presto.common.predicate.TupleDomain in project presto by prestodb.
the class IcebergPageSourceProvider method getParquetTupleDomain.
private static TupleDomain<ColumnDescriptor> getParquetTupleDomain(Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<IcebergColumnHandle> effectivePredicate) {
if (effectivePredicate.isNone()) {
return TupleDomain.none();
}
ImmutableMap.Builder<ColumnDescriptor, Domain> predicate = ImmutableMap.builder();
effectivePredicate.getDomains().get().forEach((columnHandle, domain) -> {
String baseType = columnHandle.getType().getTypeSignature().getBase();
// skip looking up predicates for complex types as Parquet only stores stats for primitives
if (!baseType.equals(StandardTypes.MAP) && !baseType.equals(StandardTypes.ARRAY) && !baseType.equals(StandardTypes.ROW)) {
RichColumnDescriptor descriptor = descriptorsByPath.get(ImmutableList.of(columnHandle.getName()));
if (descriptor != null) {
predicate.put(descriptor, domain);
}
}
});
return TupleDomain.withColumnDomains(predicate.build());
}
Aggregations