use of com.facebook.presto.common.predicate.Range in project presto by prestodb.
the class CassandraPartitionManager method getPartitions.
public CassandraPartitionResult getPartitions(ConnectorTableHandle tableHandle, TupleDomain<ColumnHandle> tupleDomain) {
CassandraTableHandle cassandraTableHandle = (CassandraTableHandle) tableHandle;
CassandraTable table = cassandraSession.getTable(cassandraTableHandle.getSchemaTableName());
List<CassandraColumnHandle> partitionKeys = table.getPartitionKeyColumns();
// fetch the partitions
List<CassandraPartition> allPartitions = getCassandraPartitions(table, tupleDomain);
log.debug("%s.%s #partitions: %d", cassandraTableHandle.getSchemaName(), cassandraTableHandle.getTableName(), allPartitions.size());
// do a final pass to filter based on fields that could not be used to build the prefix
List<CassandraPartition> partitions = allPartitions.stream().filter(partition -> tupleDomain.overlaps(partition.getTupleDomain())).collect(toList());
// All partition key domains will be fully evaluated, so we don't need to include those
TupleDomain<ColumnHandle> remainingTupleDomain = TupleDomain.none();
if (!tupleDomain.isNone()) {
if (partitions.size() == 1 && partitions.get(0).isUnpartitioned()) {
remainingTupleDomain = tupleDomain;
} else {
@SuppressWarnings({ "rawtypes", "unchecked" }) List<ColumnHandle> partitionColumns = (List) partitionKeys;
remainingTupleDomain = TupleDomain.withColumnDomains(Maps.filterKeys(tupleDomain.getDomains().get(), not(in(partitionColumns))));
}
}
// push down indexed column fixed value predicates only for unpartitioned partition which uses token range query
if ((partitions.size() == 1) && partitions.get(0).isUnpartitioned()) {
Map<ColumnHandle, Domain> domains = tupleDomain.getDomains().get();
List<ColumnHandle> indexedColumns = new ArrayList<>();
// compose partitionId by using indexed column
StringBuilder sb = new StringBuilder();
for (Map.Entry<ColumnHandle, Domain> entry : domains.entrySet()) {
CassandraColumnHandle column = (CassandraColumnHandle) entry.getKey();
Domain domain = entry.getValue();
if (column.isIndexed() && domain.isSingleValue()) {
sb.append(CassandraCqlUtils.validColumnName(column.getName())).append(" = ").append(CassandraCqlUtils.cqlValue(toCQLCompatibleString(entry.getValue().getSingleValue()), column.getCassandraType()));
indexedColumns.add(column);
// Only one indexed column predicate can be pushed down.
break;
}
}
if (sb.length() > 0) {
CassandraPartition partition = partitions.get(0);
TupleDomain<ColumnHandle> filterIndexedColumn = TupleDomain.withColumnDomains(Maps.filterKeys(remainingTupleDomain.getDomains().get(), not(in(indexedColumns))));
partitions = new ArrayList<>();
partitions.add(new CassandraPartition(partition.getKey(), sb.toString(), filterIndexedColumn, true));
return new CassandraPartitionResult(partitions, filterIndexedColumn);
}
}
return new CassandraPartitionResult(partitions, remainingTupleDomain);
}
use of com.facebook.presto.common.predicate.Range in project presto by prestodb.
the class CassandraClusteringPredicatesExtractor method getClusteringKeysSet.
private static ClusteringPushDownResult getClusteringKeysSet(List<CassandraColumnHandle> clusteringColumns, TupleDomain<ColumnHandle> predicates, VersionNumber cassandraVersion) {
ImmutableMap.Builder<ColumnHandle, Domain> domainsBuilder = ImmutableMap.builder();
ImmutableList.Builder<String> clusteringColumnSql = ImmutableList.builder();
int currentClusteringColumn = 0;
for (CassandraColumnHandle columnHandle : clusteringColumns) {
Domain domain = predicates.getDomains().get().get(columnHandle);
if (domain == null) {
break;
}
if (domain.isNullAllowed()) {
break;
}
String predicateString = null;
predicateString = domain.getValues().getValuesProcessor().transform(ranges -> {
List<Object> singleValues = new ArrayList<>();
List<String> rangeConjuncts = new ArrayList<>();
String predicate = null;
for (Range range : ranges.getOrderedRanges()) {
if (range.isAll()) {
return null;
}
if (range.isSingleValue()) {
singleValues.add(CassandraCqlUtils.cqlValue(toCQLCompatibleString(range.getSingleValue()), columnHandle.getCassandraType()));
} else {
if (!range.isLowUnbounded()) {
rangeConjuncts.add(format("%s %s %s", CassandraCqlUtils.validColumnName(columnHandle.getName()), range.isLowInclusive() ? ">=" : ">", CassandraCqlUtils.cqlValue(toCQLCompatibleString(range.getLowBoundedValue()), columnHandle.getCassandraType())));
}
if (!range.isHighUnbounded()) {
rangeConjuncts.add(format("%s %s %s", CassandraCqlUtils.validColumnName(columnHandle.getName()), range.isHighInclusive() ? "<=" : "<", CassandraCqlUtils.cqlValue(toCQLCompatibleString(range.getHighBoundedValue()), columnHandle.getCassandraType())));
}
}
}
if (!singleValues.isEmpty() && !rangeConjuncts.isEmpty()) {
return null;
}
if (!singleValues.isEmpty()) {
if (singleValues.size() == 1) {
predicate = CassandraCqlUtils.validColumnName(columnHandle.getName()) + " = " + singleValues.get(0);
} else {
predicate = CassandraCqlUtils.validColumnName(columnHandle.getName()) + " IN (" + Joiner.on(",").join(singleValues) + ")";
}
} else if (!rangeConjuncts.isEmpty()) {
predicate = Joiner.on(" AND ").join(rangeConjuncts);
}
return predicate;
}, discreteValues -> {
if (discreteValues.isWhiteList()) {
ImmutableList.Builder<Object> discreteValuesList = ImmutableList.builder();
for (Object discreteValue : discreteValues.getValues()) {
discreteValuesList.add(CassandraCqlUtils.cqlValue(toCQLCompatibleString(discreteValue), columnHandle.getCassandraType()));
}
String predicate = CassandraCqlUtils.validColumnName(columnHandle.getName()) + " IN (" + Joiner.on(",").join(discreteValuesList.build()) + ")";
return predicate;
}
return null;
}, allOrNone -> null);
if (predicateString == null) {
break;
}
// IN restriction only on last clustering column for Cassandra version = 2.1
if (predicateString.contains(" IN (") && cassandraVersion.compareTo(VersionNumber.parse("2.2.0")) < 0 && currentClusteringColumn != (clusteringColumns.size() - 1)) {
break;
}
clusteringColumnSql.add(predicateString);
domainsBuilder.put(columnHandle, domain);
// Check for last clustering column should only be restricted by range condition
if (predicateString.contains(">") || predicateString.contains("<")) {
break;
}
currentClusteringColumn++;
}
List<String> clusteringColumnPredicates = clusteringColumnSql.build();
return new ClusteringPushDownResult(domainsBuilder.build(), Joiner.on(" AND ").join(clusteringColumnPredicates));
}
use of com.facebook.presto.common.predicate.Range in project presto by prestodb.
the class TupleDomainParquetPredicate method createDomain.
private static <T extends Comparable<T>> Domain createDomain(Type type, ColumnIndex columnIndex, boolean hasNullValue, List<T> mins, List<T> maxs) {
if (mins.size() == 0 || maxs.size() == 0 || mins.size() != maxs.size()) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
int pageCount = columnIndex.getMinValues().size();
List<Range> ranges = new ArrayList<>();
for (int i = 0; i < pageCount; i++) {
T min = mins.get(i);
T max = maxs.get(i);
if (min.compareTo(max) > 0) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
if (min instanceof Long) {
if (isStatisticsOverflow(type, asLong(min), asLong(max))) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
ranges.add(Range.range(type, min, true, max, true));
} else if (min instanceof Double) {
if (((Double) min).isNaN() || ((Double) max).isNaN()) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
ranges.add(Range.range(type, min, true, max, true));
} else if (min instanceof Slice) {
ranges.add(Range.range(type, min, true, max, true));
}
}
checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
use of com.facebook.presto.common.predicate.Range in project presto by prestodb.
the class TupleDomainParquetPredicate method getDomain.
/**
* Get a domain for the ranges defined by each pair of elements from {@code minimums} and {@code maximums}.
* Both arrays must have the same length.
*/
private static Domain getDomain(ColumnDescriptor column, Type type, List<Object> minimums, List<Object> maximums, boolean hasNullValue) {
checkArgument(minimums.size() == maximums.size(), "Expected minimums and maximums to have the same size");
List<Range> ranges = new ArrayList<>();
if (type.equals(BOOLEAN)) {
boolean hasTrueValues = minimums.stream().anyMatch(value -> (boolean) value) || maximums.stream().anyMatch(value -> (boolean) value);
boolean hasFalseValues = minimums.stream().anyMatch(value -> !(boolean) value) || maximums.stream().anyMatch(value -> !(boolean) value);
if (hasTrueValues && hasFalseValues) {
return Domain.all(type);
}
if (hasTrueValues) {
return Domain.create(ValueSet.of(type, true), hasNullValue);
}
if (hasFalseValues) {
return Domain.create(ValueSet.of(type, false), hasNullValue);
}
// All nulls case is handled earlier
throw new VerifyException("Impossible boolean statistics");
}
if ((type.equals(BIGINT) || type.equals(TINYINT) || type.equals(SMALLINT) || type.equals(INTEGER))) {
for (int i = 0; i < minimums.size(); i++) {
long min = asLong(minimums.get(i));
long max = asLong(maximums.get(i));
if (isStatisticsOverflow(type, min, max)) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
ranges.add(Range.range(type, min, true, max, true));
}
checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
if (type.equals(REAL)) {
for (int i = 0; i < minimums.size(); i++) {
Float min = (Float) minimums.get(i);
Float max = (Float) maximums.get(i);
if (min.isNaN() || max.isNaN()) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
ranges.add(Range.range(type, (long) floatToRawIntBits(min), true, (long) floatToRawIntBits(max), true));
}
checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
if (type.equals(DOUBLE)) {
for (int i = 0; i < minimums.size(); i++) {
Double min = (Double) minimums.get(i);
Double max = (Double) maximums.get(i);
if (min.isNaN() || max.isNaN()) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
ranges.add(Range.range(type, min, true, max, true));
}
checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
if (isVarcharType(type)) {
for (int i = 0; i < minimums.size(); i++) {
Slice min = Slices.wrappedBuffer(((Binary) minimums.get(i)).toByteBuffer());
Slice max = Slices.wrappedBuffer(((Binary) maximums.get(i)).toByteBuffer());
ranges.add(Range.range(type, min, true, max, true));
}
checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
if (type.equals(DATE)) {
for (int i = 0; i < minimums.size(); i++) {
long min = asLong(minimums.get(i));
long max = asLong(maximums.get(i));
if (isStatisticsOverflow(type, min, max)) {
return Domain.create(ValueSet.all(type), hasNullValue);
}
ranges.add(Range.range(type, min, true, max, true));
}
checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
}
return Domain.create(ValueSet.all(type), hasNullValue);
}
use of com.facebook.presto.common.predicate.Range in project presto by prestodb.
the class TestHiveSplitManager method testPartitionStatsBasedOptimizationForDouble.
@Test
public void testPartitionStatsBasedOptimizationForDouble() throws Exception {
Type type = DOUBLE;
Range partitionRange = range(type, 10.0, true, 20.0, true);
HiveColumnHandle columnHandle = new HiveColumnHandle("t_double", HIVE_DOUBLE, type.getTypeSignature(), 0, REGULAR, Optional.empty(), Optional.empty());
// Test no partition stats
assertRedundantColumnDomains(partitionRange, PartitionStatistics.empty(), ImmutableList.of(ImmutableSet.of()), columnHandle);
// Test partition left unchanged
assertRedundantColumnDomains(partitionRange, createDoublePartitionStatistics(5.0, 25.0, columnHandle.getName()), ImmutableList.of(ImmutableSet.of()), columnHandle);
// Test partition being pruned
assertRedundantColumnDomains(partitionRange, createDoublePartitionStatistics(1.0, 3.0, columnHandle.getName()), ImmutableList.of(), columnHandle);
// Test partition having subfield domain stripped
assertRedundantColumnDomains(partitionRange, createDoublePartitionStatistics(13.0, 15.0, columnHandle.getName()), ImmutableList.of(ImmutableSet.of(columnHandle)), columnHandle);
}
Aggregations