use of com.facebook.presto.spi.DiscretePredicates in project presto by prestodb.
the class AbstractTestHiveClient method assertExpectedTableLayout.
protected void assertExpectedTableLayout(ConnectorTableLayout actualTableLayout, ConnectorTableLayout expectedTableLayout) {
assertExpectedTableLayoutHandle(actualTableLayout.getHandle(), expectedTableLayout.getHandle());
assertEquals(actualTableLayout.getPredicate(), expectedTableLayout.getPredicate());
assertEquals(actualTableLayout.getDiscretePredicates().isPresent(), expectedTableLayout.getDiscretePredicates().isPresent());
actualTableLayout.getDiscretePredicates().ifPresent(actual -> {
DiscretePredicates expected = expectedTableLayout.getDiscretePredicates().get();
assertEquals(actual.getColumns(), expected.getColumns());
assertEqualsIgnoreOrder(actual.getPredicates(), expected.getPredicates());
});
assertEquals(actualTableLayout.getStreamPartitioningColumns(), expectedTableLayout.getStreamPartitioningColumns());
assertEquals(actualTableLayout.getLocalProperties(), expectedTableLayout.getLocalProperties());
}
use of com.facebook.presto.spi.DiscretePredicates in project presto by prestodb.
the class HiveMetadata method getTableLayout.
@Override
public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTableLayoutHandle layoutHandle) {
HiveTableLayoutHandle hiveLayoutHandle = (HiveTableLayoutHandle) layoutHandle;
List<ColumnHandle> partitionColumns = ImmutableList.copyOf(hiveLayoutHandle.getPartitionColumns());
List<HivePartition> partitions = hiveLayoutHandle.getPartitions().get();
Optional<DiscretePredicates> discretePredicates = Optional.empty();
if (!partitionColumns.isEmpty()) {
// Do not create tuple domains for every partition at the same time!
// There can be a huge number of partitions so use an iterable so
// all domains do not need to be in memory at the same time.
Iterable<TupleDomain<ColumnHandle>> partitionDomains = Iterables.transform(partitions, (hivePartition) -> TupleDomain.fromFixedValues(hivePartition.getKeys()));
discretePredicates = Optional.of(new DiscretePredicates(partitionColumns, partitionDomains));
}
Optional<ConnectorTablePartitioning> tablePartitioning = Optional.empty();
SchemaTableName tableName = hiveLayoutHandle.getSchemaTableName();
MetastoreContext metastoreContext = getMetastoreContext(session);
Table table = metastore.getTable(metastoreContext, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
// never ignore table bucketing for temporary tables as those are created such explicitly by the engine request
boolean bucketExecutionEnabled = table.getTableType().equals(TEMPORARY_TABLE) || isBucketExecutionEnabled(session);
if (bucketExecutionEnabled && hiveLayoutHandle.getBucketHandle().isPresent()) {
HiveBucketHandle hiveBucketHandle = hiveLayoutHandle.getBucketHandle().get();
HivePartitioningHandle partitioningHandle;
int bucketCount = hiveBucketHandle.getReadBucketCount();
OptionalInt maxCompatibleBucketCount = OptionalInt.empty();
// Virtually bucketed table does not have table bucket property
if (hiveBucketHandle.isVirtuallyBucketed()) {
partitioningHandle = createHiveCompatiblePartitioningHandle(bucketCount, hiveBucketHandle.getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), maxCompatibleBucketCount);
} else {
HiveBucketProperty bucketProperty = table.getStorage().getBucketProperty().orElseThrow(() -> new IllegalArgumentException("bucketProperty is expected to be present"));
switch(bucketProperty.getBucketFunctionType()) {
case HIVE_COMPATIBLE:
partitioningHandle = createHiveCompatiblePartitioningHandle(bucketCount, hiveBucketHandle.getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), maxCompatibleBucketCount);
break;
case PRESTO_NATIVE:
partitioningHandle = createPrestoNativePartitioningHandle(bucketCount, bucketProperty.getTypes().get(), maxCompatibleBucketCount);
break;
default:
throw new IllegalArgumentException("Unsupported bucket function type " + bucketProperty.getBucketFunctionType());
}
}
tablePartitioning = Optional.of(new ConnectorTablePartitioning(partitioningHandle, hiveBucketHandle.getColumns().stream().map(ColumnHandle.class::cast).collect(toImmutableList())));
}
TupleDomain<ColumnHandle> predicate;
if (hiveLayoutHandle.isPushdownFilterEnabled()) {
predicate = hiveLayoutHandle.getDomainPredicate().transform(subfield -> isEntireColumn(subfield) ? subfield.getRootName() : null).transform(hiveLayoutHandle.getPredicateColumns()::get).transform(ColumnHandle.class::cast).intersect(createPredicate(partitionColumns, partitions));
} else {
predicate = createPredicate(partitionColumns, partitions);
}
// Expose ordering property of the table.
ImmutableList.Builder<LocalProperty<ColumnHandle>> localProperties = ImmutableList.builder();
Optional<Set<ColumnHandle>> streamPartitionColumns = Optional.empty();
if (table.getStorage().getBucketProperty().isPresent() && !table.getStorage().getBucketProperty().get().getSortedBy().isEmpty()) {
ImmutableSet.Builder<ColumnHandle> streamPartitionColumnsBuilder = ImmutableSet.builder();
// streamPartitioningColumns is how we partition the data across splits.
// localProperty is how we partition the data within a split.
// 1. add partition columns to streamPartitionColumns
partitionColumns.forEach(streamPartitionColumnsBuilder::add);
// 2. add sorted columns to streamPartitionColumns and localProperties
HiveBucketProperty bucketProperty = table.getStorage().getBucketProperty().get();
Map<String, ColumnHandle> columnHandles = hiveColumnHandles(table).stream().collect(toImmutableMap(HiveColumnHandle::getName, identity()));
bucketProperty.getSortedBy().forEach(sortingColumn -> {
ColumnHandle columnHandle = columnHandles.get(sortingColumn.getColumnName());
localProperties.add(new SortingProperty<>(columnHandle, sortingColumn.getOrder().getSortOrder()));
streamPartitionColumnsBuilder.add(columnHandle);
});
// We currently only set streamPartitionColumns when it enables streaming aggregation and also it's eligible to enable streaming aggregation
// 1. When the bucket columns are the same as the prefix of the sort columns
// 2. When all rows of the same value group are guaranteed to be in the same split. We disable splitting a file when isStreamingAggregationEnabled is true to make sure the property is guaranteed.
List<String> sortColumns = bucketProperty.getSortedBy().stream().map(SortingColumn::getColumnName).collect(toImmutableList());
if (bucketProperty.getBucketedBy().size() <= sortColumns.size() && bucketProperty.getBucketedBy().containsAll(sortColumns.subList(0, bucketProperty.getBucketedBy().size())) && isStreamingAggregationEnabled(session)) {
streamPartitionColumns = Optional.of(streamPartitionColumnsBuilder.build());
}
}
return new ConnectorTableLayout(hiveLayoutHandle, Optional.empty(), predicate, tablePartitioning, streamPartitionColumns, discretePredicates, localProperties.build(), Optional.of(hiveLayoutHandle.getRemainingPredicate()));
}
use of com.facebook.presto.spi.DiscretePredicates in project presto by prestodb.
the class AbstractTestHiveClient method setupHive.
protected void setupHive(String connectorId, String databaseName, String timeZoneId) {
clientId = connectorId;
database = databaseName;
tablePartitionFormat = new SchemaTableName(database, "presto_test_partition_format");
tableUnpartitioned = new SchemaTableName(database, "presto_test_unpartitioned");
tableOffline = new SchemaTableName(database, "presto_test_offline");
tableOfflinePartition = new SchemaTableName(database, "presto_test_offline_partition");
tableNotReadable = new SchemaTableName(database, "presto_test_not_readable");
view = new SchemaTableName(database, "presto_test_view");
invalidTable = new SchemaTableName(database, INVALID_TABLE);
tableBucketedStringInt = new SchemaTableName(database, "presto_test_bucketed_by_string_int");
tableBucketedBigintBoolean = new SchemaTableName(database, "presto_test_bucketed_by_bigint_boolean");
tableBucketedDoubleFloat = new SchemaTableName(database, "presto_test_bucketed_by_double_float");
tablePartitionSchemaChange = new SchemaTableName(database, "presto_test_partition_schema_change");
tablePartitionSchemaChangeNonCanonical = new SchemaTableName(database, "presto_test_partition_schema_change_non_canonical");
tableBucketEvolution = new SchemaTableName(database, "presto_test_bucket_evolution");
invalidClientId = "hive";
invalidTableHandle = new HiveTableHandle(database, INVALID_TABLE);
invalidTableLayoutHandle = new HiveTableLayoutHandle(invalidTable, "path", ImmutableList.of(), ImmutableList.of(), ImmutableMap.of(), ImmutableList.of(new HivePartition(invalidTable, "unknown", ImmutableMap.of())), TupleDomain.all(), TRUE_CONSTANT, ImmutableMap.of(), TupleDomain.all(), Optional.empty(), Optional.empty(), false, "layout", Optional.empty(), false);
int partitionColumnIndex = MAX_PARTITION_KEY_COLUMN_INDEX;
dsColumn = new HiveColumnHandle("ds", HIVE_STRING, parseTypeSignature(StandardTypes.VARCHAR), partitionColumnIndex--, PARTITION_KEY, Optional.empty(), Optional.empty());
fileFormatColumn = new HiveColumnHandle("file_format", HIVE_STRING, parseTypeSignature(StandardTypes.VARCHAR), partitionColumnIndex--, PARTITION_KEY, Optional.empty(), Optional.empty());
dummyColumn = new HiveColumnHandle("dummy", HIVE_INT, parseTypeSignature(StandardTypes.INTEGER), partitionColumnIndex--, PARTITION_KEY, Optional.empty(), Optional.empty());
intColumn = new HiveColumnHandle("t_int", HIVE_INT, parseTypeSignature(StandardTypes.INTEGER), partitionColumnIndex--, PARTITION_KEY, Optional.empty(), Optional.empty());
invalidColumnHandle = new HiveColumnHandle(INVALID_COLUMN, HIVE_STRING, parseTypeSignature(StandardTypes.VARCHAR), 0, REGULAR, Optional.empty(), Optional.empty());
List<HiveColumnHandle> partitionColumns = ImmutableList.of(dsColumn, fileFormatColumn, dummyColumn);
List<HivePartition> partitions = ImmutableList.<HivePartition>builder().add(new HivePartition(tablePartitionFormat, "ds=2012-12-29/file_format=textfile/dummy=1", ImmutableMap.<ColumnHandle, NullableValue>builder().put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))).put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("textfile"))).put(dummyColumn, NullableValue.of(INTEGER, 1L)).build())).add(new HivePartition(tablePartitionFormat, "ds=2012-12-29/file_format=sequencefile/dummy=2", ImmutableMap.<ColumnHandle, NullableValue>builder().put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))).put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("sequencefile"))).put(dummyColumn, NullableValue.of(INTEGER, 2L)).build())).add(new HivePartition(tablePartitionFormat, "ds=2012-12-29/file_format=rctext/dummy=3", ImmutableMap.<ColumnHandle, NullableValue>builder().put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))).put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("rctext"))).put(dummyColumn, NullableValue.of(INTEGER, 3L)).build())).add(new HivePartition(tablePartitionFormat, "ds=2012-12-29/file_format=rcbinary/dummy=4", ImmutableMap.<ColumnHandle, NullableValue>builder().put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))).put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("rcbinary"))).put(dummyColumn, NullableValue.of(INTEGER, 4L)).build())).build();
partitionCount = partitions.size();
tupleDomain = TupleDomain.fromFixedValues(ImmutableMap.of(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))));
TupleDomain<Subfield> domainPredicate = tupleDomain.transform(HiveColumnHandle.class::cast).transform(column -> new Subfield(column.getName(), ImmutableList.of()));
tableLayout = new ConnectorTableLayout(new HiveTableLayoutHandle(tablePartitionFormat, "path", partitionColumns, ImmutableList.of(new Column("t_string", HIVE_STRING, Optional.empty(), Optional.empty()), new Column("t_tinyint", HIVE_BYTE, Optional.empty(), Optional.empty()), new Column("t_smallint", HIVE_SHORT, Optional.empty(), Optional.empty()), new Column("t_int", HIVE_INT, Optional.empty(), Optional.empty()), new Column("t_bigint", HIVE_LONG, Optional.empty(), Optional.empty()), new Column("t_float", HIVE_FLOAT, Optional.empty(), Optional.empty()), new Column("t_double", HIVE_DOUBLE, Optional.empty(), Optional.empty()), new Column("t_boolean", HIVE_BOOLEAN, Optional.empty(), Optional.empty())), ImmutableMap.of(), partitions, domainPredicate, TRUE_CONSTANT, ImmutableMap.of(dsColumn.getName(), dsColumn), tupleDomain, Optional.empty(), Optional.empty(), false, "layout", Optional.empty(), false), Optional.empty(), withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("textfile")), Range.equal(createUnboundedVarcharType(), utf8Slice("sequencefile")), Range.equal(createUnboundedVarcharType(), utf8Slice("rctext")), Range.equal(createUnboundedVarcharType(), utf8Slice("rcbinary"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 1L), Range.equal(INTEGER, 2L), Range.equal(INTEGER, 3L), Range.equal(INTEGER, 4L)), false))), Optional.empty(), Optional.empty(), Optional.of(new DiscretePredicates(ImmutableList.copyOf(partitionColumns), ImmutableList.of(withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("textfile"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 1L)), false))), withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("sequencefile"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 2L)), false))), withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("rctext"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 3L)), false))), withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("rcbinary"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 4L)), false)))))), ImmutableList.of());
List<HivePartition> unpartitionedPartitions = ImmutableList.of(new HivePartition(tableUnpartitioned));
unpartitionedTableLayout = new ConnectorTableLayout(new HiveTableLayoutHandle(tableUnpartitioned, "path", ImmutableList.of(), ImmutableList.of(new Column("t_string", HIVE_STRING, Optional.empty(), Optional.empty()), new Column("t_tinyint", HIVE_BYTE, Optional.empty(), Optional.empty())), ImmutableMap.of(), unpartitionedPartitions, TupleDomain.all(), TRUE_CONSTANT, ImmutableMap.of(), TupleDomain.all(), Optional.empty(), Optional.empty(), false, "layout", Optional.empty(), false));
timeZone = DateTimeZone.forTimeZone(TimeZone.getTimeZone(ZoneId.of(timeZoneId)));
}
Aggregations