use of com.facebook.presto.spi.ConnectorTablePartitioning in project presto by prestodb.
the class HiveMetadata method getTableLayout.
@Override
public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTableLayoutHandle layoutHandle) {
HiveTableLayoutHandle hiveLayoutHandle = (HiveTableLayoutHandle) layoutHandle;
List<ColumnHandle> partitionColumns = ImmutableList.copyOf(hiveLayoutHandle.getPartitionColumns());
List<HivePartition> partitions = hiveLayoutHandle.getPartitions().get();
Optional<DiscretePredicates> discretePredicates = Optional.empty();
if (!partitionColumns.isEmpty()) {
// Do not create tuple domains for every partition at the same time!
// There can be a huge number of partitions so use an iterable so
// all domains do not need to be in memory at the same time.
Iterable<TupleDomain<ColumnHandle>> partitionDomains = Iterables.transform(partitions, (hivePartition) -> TupleDomain.fromFixedValues(hivePartition.getKeys()));
discretePredicates = Optional.of(new DiscretePredicates(partitionColumns, partitionDomains));
}
Optional<ConnectorTablePartitioning> tablePartitioning = Optional.empty();
SchemaTableName tableName = hiveLayoutHandle.getSchemaTableName();
MetastoreContext metastoreContext = getMetastoreContext(session);
Table table = metastore.getTable(metastoreContext, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
// never ignore table bucketing for temporary tables as those are created such explicitly by the engine request
boolean bucketExecutionEnabled = table.getTableType().equals(TEMPORARY_TABLE) || isBucketExecutionEnabled(session);
if (bucketExecutionEnabled && hiveLayoutHandle.getBucketHandle().isPresent()) {
HiveBucketHandle hiveBucketHandle = hiveLayoutHandle.getBucketHandle().get();
HivePartitioningHandle partitioningHandle;
int bucketCount = hiveBucketHandle.getReadBucketCount();
OptionalInt maxCompatibleBucketCount = OptionalInt.empty();
// Virtually bucketed table does not have table bucket property
if (hiveBucketHandle.isVirtuallyBucketed()) {
partitioningHandle = createHiveCompatiblePartitioningHandle(bucketCount, hiveBucketHandle.getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), maxCompatibleBucketCount);
} else {
HiveBucketProperty bucketProperty = table.getStorage().getBucketProperty().orElseThrow(() -> new IllegalArgumentException("bucketProperty is expected to be present"));
switch(bucketProperty.getBucketFunctionType()) {
case HIVE_COMPATIBLE:
partitioningHandle = createHiveCompatiblePartitioningHandle(bucketCount, hiveBucketHandle.getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), maxCompatibleBucketCount);
break;
case PRESTO_NATIVE:
partitioningHandle = createPrestoNativePartitioningHandle(bucketCount, bucketProperty.getTypes().get(), maxCompatibleBucketCount);
break;
default:
throw new IllegalArgumentException("Unsupported bucket function type " + bucketProperty.getBucketFunctionType());
}
}
tablePartitioning = Optional.of(new ConnectorTablePartitioning(partitioningHandle, hiveBucketHandle.getColumns().stream().map(ColumnHandle.class::cast).collect(toImmutableList())));
}
TupleDomain<ColumnHandle> predicate;
if (hiveLayoutHandle.isPushdownFilterEnabled()) {
predicate = hiveLayoutHandle.getDomainPredicate().transform(subfield -> isEntireColumn(subfield) ? subfield.getRootName() : null).transform(hiveLayoutHandle.getPredicateColumns()::get).transform(ColumnHandle.class::cast).intersect(createPredicate(partitionColumns, partitions));
} else {
predicate = createPredicate(partitionColumns, partitions);
}
// Expose ordering property of the table.
ImmutableList.Builder<LocalProperty<ColumnHandle>> localProperties = ImmutableList.builder();
Optional<Set<ColumnHandle>> streamPartitionColumns = Optional.empty();
if (table.getStorage().getBucketProperty().isPresent() && !table.getStorage().getBucketProperty().get().getSortedBy().isEmpty()) {
ImmutableSet.Builder<ColumnHandle> streamPartitionColumnsBuilder = ImmutableSet.builder();
// streamPartitioningColumns is how we partition the data across splits.
// localProperty is how we partition the data within a split.
// 1. add partition columns to streamPartitionColumns
partitionColumns.forEach(streamPartitionColumnsBuilder::add);
// 2. add sorted columns to streamPartitionColumns and localProperties
HiveBucketProperty bucketProperty = table.getStorage().getBucketProperty().get();
Map<String, ColumnHandle> columnHandles = hiveColumnHandles(table).stream().collect(toImmutableMap(HiveColumnHandle::getName, identity()));
bucketProperty.getSortedBy().forEach(sortingColumn -> {
ColumnHandle columnHandle = columnHandles.get(sortingColumn.getColumnName());
localProperties.add(new SortingProperty<>(columnHandle, sortingColumn.getOrder().getSortOrder()));
streamPartitionColumnsBuilder.add(columnHandle);
});
// We currently only set streamPartitionColumns when it enables streaming aggregation and also it's eligible to enable streaming aggregation
// 1. When the bucket columns are the same as the prefix of the sort columns
// 2. When all rows of the same value group are guaranteed to be in the same split. We disable splitting a file when isStreamingAggregationEnabled is true to make sure the property is guaranteed.
List<String> sortColumns = bucketProperty.getSortedBy().stream().map(SortingColumn::getColumnName).collect(toImmutableList());
if (bucketProperty.getBucketedBy().size() <= sortColumns.size() && bucketProperty.getBucketedBy().containsAll(sortColumns.subList(0, bucketProperty.getBucketedBy().size())) && isStreamingAggregationEnabled(session)) {
streamPartitionColumns = Optional.of(streamPartitionColumnsBuilder.build());
}
}
return new ConnectorTableLayout(hiveLayoutHandle, Optional.empty(), predicate, tablePartitioning, streamPartitionColumns, discretePredicates, localProperties.build(), Optional.of(hiveLayoutHandle.getRemainingPredicate()));
}
use of com.facebook.presto.spi.ConnectorTablePartitioning in project presto by prestodb.
the class TpchMetadata method getTableLayouts.
@Override
public List<ConnectorTableLayoutResult> getTableLayouts(ConnectorSession session, ConnectorTableHandle table, Constraint<ColumnHandle> constraint, Optional<Set<ColumnHandle>> desiredColumns) {
TpchTableHandle tableHandle = (TpchTableHandle) table;
Optional<ConnectorTablePartitioning> tablePartitioning = Optional.empty();
Optional<Set<ColumnHandle>> partitioningColumns = Optional.empty();
List<LocalProperty<ColumnHandle>> localProperties = ImmutableList.of();
TupleDomain<ColumnHandle> predicate = TupleDomain.all();
TupleDomain<ColumnHandle> unenforcedConstraint = constraint.getSummary();
Map<String, ColumnHandle> columns = getColumnHandles(session, tableHandle);
if (tableHandle.getTableName().equals(TpchTable.ORDERS.getTableName())) {
if (partitioningEnabled) {
ColumnHandle orderKeyColumn = columns.get(columnNaming.getName(OrderColumn.ORDER_KEY));
tablePartitioning = Optional.of(new ConnectorTablePartitioning(new TpchPartitioningHandle(TpchTable.ORDERS.getTableName(), calculateTotalRows(OrderGenerator.SCALE_BASE, tableHandle.getScaleFactor())), ImmutableList.of(orderKeyColumn)));
partitioningColumns = Optional.of(ImmutableSet.of(orderKeyColumn));
localProperties = ImmutableList.of(new SortingProperty<>(orderKeyColumn, SortOrder.ASC_NULLS_FIRST));
}
if (predicatePushdownEnabled) {
predicate = toTupleDomain(ImmutableMap.of(toColumnHandle(OrderColumn.ORDER_STATUS), filterValues(ORDER_STATUS_NULLABLE_VALUES, OrderColumn.ORDER_STATUS, constraint)));
unenforcedConstraint = filterOutColumnFromPredicate(constraint.getSummary(), toColumnHandle(OrderColumn.ORDER_STATUS));
}
} else if (predicatePushdownEnabled && tableHandle.getTableName().equals(TpchTable.PART.getTableName())) {
predicate = toTupleDomain(ImmutableMap.of(toColumnHandle(PartColumn.CONTAINER), filterValues(PART_CONTAINER_NULLABLE_VALUES, PartColumn.CONTAINER, constraint), toColumnHandle(PartColumn.TYPE), filterValues(PART_TYPE_NULLABLE_VALUES, PartColumn.TYPE, constraint)));
unenforcedConstraint = filterOutColumnFromPredicate(constraint.getSummary(), toColumnHandle(PartColumn.CONTAINER));
unenforcedConstraint = filterOutColumnFromPredicate(unenforcedConstraint, toColumnHandle(PartColumn.TYPE));
} else if (tableHandle.getTableName().equals(TpchTable.LINE_ITEM.getTableName())) {
if (partitioningEnabled) {
ColumnHandle orderKeyColumn = columns.get(columnNaming.getName(LineItemColumn.ORDER_KEY));
tablePartitioning = Optional.of(new ConnectorTablePartitioning(new TpchPartitioningHandle(TpchTable.ORDERS.getTableName(), calculateTotalRows(OrderGenerator.SCALE_BASE, tableHandle.getScaleFactor())), ImmutableList.of(orderKeyColumn)));
partitioningColumns = Optional.of(ImmutableSet.of(orderKeyColumn));
localProperties = ImmutableList.of(new SortingProperty<>(orderKeyColumn, SortOrder.ASC_NULLS_FIRST), new SortingProperty<>(columns.get(columnNaming.getName(LineItemColumn.LINE_NUMBER)), SortOrder.ASC_NULLS_FIRST));
}
}
ConnectorTableLayout layout = new ConnectorTableLayout(new TpchTableLayoutHandle(tableHandle, predicate), Optional.empty(), // TODO: conditionally return well-known properties (e.g., orderkey > 0, etc)
predicate, tablePartitioning, partitioningColumns, Optional.empty(), localProperties);
return ImmutableList.of(new ConnectorTableLayoutResult(layout, unenforcedConstraint));
}
Aggregations