Search in sources :

Example 1 with ConnectorTablePartitioning

use of com.facebook.presto.spi.ConnectorTablePartitioning in project presto by prestodb.

the class HiveMetadata method getTableLayout.

@Override
public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTableLayoutHandle layoutHandle) {
    HiveTableLayoutHandle hiveLayoutHandle = (HiveTableLayoutHandle) layoutHandle;
    List<ColumnHandle> partitionColumns = ImmutableList.copyOf(hiveLayoutHandle.getPartitionColumns());
    List<HivePartition> partitions = hiveLayoutHandle.getPartitions().get();
    Optional<DiscretePredicates> discretePredicates = Optional.empty();
    if (!partitionColumns.isEmpty()) {
        // Do not create tuple domains for every partition at the same time!
        // There can be a huge number of partitions so use an iterable so
        // all domains do not need to be in memory at the same time.
        Iterable<TupleDomain<ColumnHandle>> partitionDomains = Iterables.transform(partitions, (hivePartition) -> TupleDomain.fromFixedValues(hivePartition.getKeys()));
        discretePredicates = Optional.of(new DiscretePredicates(partitionColumns, partitionDomains));
    }
    Optional<ConnectorTablePartitioning> tablePartitioning = Optional.empty();
    SchemaTableName tableName = hiveLayoutHandle.getSchemaTableName();
    MetastoreContext metastoreContext = getMetastoreContext(session);
    Table table = metastore.getTable(metastoreContext, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
    // never ignore table bucketing for temporary tables as those are created such explicitly by the engine request
    boolean bucketExecutionEnabled = table.getTableType().equals(TEMPORARY_TABLE) || isBucketExecutionEnabled(session);
    if (bucketExecutionEnabled && hiveLayoutHandle.getBucketHandle().isPresent()) {
        HiveBucketHandle hiveBucketHandle = hiveLayoutHandle.getBucketHandle().get();
        HivePartitioningHandle partitioningHandle;
        int bucketCount = hiveBucketHandle.getReadBucketCount();
        OptionalInt maxCompatibleBucketCount = OptionalInt.empty();
        // Virtually bucketed table does not have table bucket property
        if (hiveBucketHandle.isVirtuallyBucketed()) {
            partitioningHandle = createHiveCompatiblePartitioningHandle(bucketCount, hiveBucketHandle.getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), maxCompatibleBucketCount);
        } else {
            HiveBucketProperty bucketProperty = table.getStorage().getBucketProperty().orElseThrow(() -> new IllegalArgumentException("bucketProperty is expected to be present"));
            switch(bucketProperty.getBucketFunctionType()) {
                case HIVE_COMPATIBLE:
                    partitioningHandle = createHiveCompatiblePartitioningHandle(bucketCount, hiveBucketHandle.getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), maxCompatibleBucketCount);
                    break;
                case PRESTO_NATIVE:
                    partitioningHandle = createPrestoNativePartitioningHandle(bucketCount, bucketProperty.getTypes().get(), maxCompatibleBucketCount);
                    break;
                default:
                    throw new IllegalArgumentException("Unsupported bucket function type " + bucketProperty.getBucketFunctionType());
            }
        }
        tablePartitioning = Optional.of(new ConnectorTablePartitioning(partitioningHandle, hiveBucketHandle.getColumns().stream().map(ColumnHandle.class::cast).collect(toImmutableList())));
    }
    TupleDomain<ColumnHandle> predicate;
    if (hiveLayoutHandle.isPushdownFilterEnabled()) {
        predicate = hiveLayoutHandle.getDomainPredicate().transform(subfield -> isEntireColumn(subfield) ? subfield.getRootName() : null).transform(hiveLayoutHandle.getPredicateColumns()::get).transform(ColumnHandle.class::cast).intersect(createPredicate(partitionColumns, partitions));
    } else {
        predicate = createPredicate(partitionColumns, partitions);
    }
    // Expose ordering property of the table.
    ImmutableList.Builder<LocalProperty<ColumnHandle>> localProperties = ImmutableList.builder();
    Optional<Set<ColumnHandle>> streamPartitionColumns = Optional.empty();
    if (table.getStorage().getBucketProperty().isPresent() && !table.getStorage().getBucketProperty().get().getSortedBy().isEmpty()) {
        ImmutableSet.Builder<ColumnHandle> streamPartitionColumnsBuilder = ImmutableSet.builder();
        // streamPartitioningColumns is how we partition the data across splits.
        // localProperty is how we partition the data within a split.
        // 1. add partition columns to streamPartitionColumns
        partitionColumns.forEach(streamPartitionColumnsBuilder::add);
        // 2. add sorted columns to streamPartitionColumns and localProperties
        HiveBucketProperty bucketProperty = table.getStorage().getBucketProperty().get();
        Map<String, ColumnHandle> columnHandles = hiveColumnHandles(table).stream().collect(toImmutableMap(HiveColumnHandle::getName, identity()));
        bucketProperty.getSortedBy().forEach(sortingColumn -> {
            ColumnHandle columnHandle = columnHandles.get(sortingColumn.getColumnName());
            localProperties.add(new SortingProperty<>(columnHandle, sortingColumn.getOrder().getSortOrder()));
            streamPartitionColumnsBuilder.add(columnHandle);
        });
        // We currently only set streamPartitionColumns when it enables streaming aggregation and also it's eligible to enable streaming aggregation
        // 1. When the bucket columns are the same as the prefix of the sort columns
        // 2. When all rows of the same value group are guaranteed to be in the same split. We disable splitting a file when isStreamingAggregationEnabled is true to make sure the property is guaranteed.
        List<String> sortColumns = bucketProperty.getSortedBy().stream().map(SortingColumn::getColumnName).collect(toImmutableList());
        if (bucketProperty.getBucketedBy().size() <= sortColumns.size() && bucketProperty.getBucketedBy().containsAll(sortColumns.subList(0, bucketProperty.getBucketedBy().size())) && isStreamingAggregationEnabled(session)) {
            streamPartitionColumns = Optional.of(streamPartitionColumnsBuilder.build());
        }
    }
    return new ConnectorTableLayout(hiveLayoutHandle, Optional.empty(), predicate, tablePartitioning, streamPartitionColumns, discretePredicates, localProperties.build(), Optional.of(hiveLayoutHandle.getRemainingPredicate()));
}
Also used : ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableSet(com.google.common.collect.ImmutableSet) InMemoryRecordSet(com.facebook.presto.spi.InMemoryRecordSet) HashSet(java.util.HashSet) Set(java.util.Set) Collectors.toSet(java.util.stream.Collectors.toSet) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConnectorTablePartitioning(com.facebook.presto.spi.ConnectorTablePartitioning) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableSet(com.google.common.collect.ImmutableSet) ColumnHandle(com.facebook.presto.spi.ColumnHandle) SystemTable(com.facebook.presto.spi.SystemTable) DwrfTableEncryptionProperties.forTable(com.facebook.presto.hive.DwrfTableEncryptionProperties.forTable) HiveUtil.translateHiveUnsupportedTypeForTemporaryTable(com.facebook.presto.hive.HiveUtil.translateHiveUnsupportedTypeForTemporaryTable) Table(com.facebook.presto.hive.metastore.Table) HiveUtil.translateHiveUnsupportedTypesForTemporaryTable(com.facebook.presto.hive.HiveUtil.translateHiveUnsupportedTypesForTemporaryTable) HiveTableProperties.getEncryptTable(com.facebook.presto.hive.HiveTableProperties.getEncryptTable) HiveSessionProperties.shouldCreateEmptyBucketFilesForTemporaryTable(com.facebook.presto.hive.HiveSessionProperties.shouldCreateEmptyBucketFilesForTemporaryTable) HiveTableProperties.isExternalTable(com.facebook.presto.hive.HiveTableProperties.isExternalTable) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) DiscretePredicates(com.facebook.presto.spi.DiscretePredicates) OptionalInt(java.util.OptionalInt) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Constraint(com.facebook.presto.spi.Constraint) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) ConnectorTableLayout(com.facebook.presto.spi.ConnectorTableLayout) HiveBucketing.getHiveBucketHandle(com.facebook.presto.hive.HiveBucketing.getHiveBucketHandle) LocalProperty(com.facebook.presto.spi.LocalProperty)

Example 2 with ConnectorTablePartitioning

use of com.facebook.presto.spi.ConnectorTablePartitioning in project presto by prestodb.

the class TpchMetadata method getTableLayouts.

@Override
public List<ConnectorTableLayoutResult> getTableLayouts(ConnectorSession session, ConnectorTableHandle table, Constraint<ColumnHandle> constraint, Optional<Set<ColumnHandle>> desiredColumns) {
    TpchTableHandle tableHandle = (TpchTableHandle) table;
    Optional<ConnectorTablePartitioning> tablePartitioning = Optional.empty();
    Optional<Set<ColumnHandle>> partitioningColumns = Optional.empty();
    List<LocalProperty<ColumnHandle>> localProperties = ImmutableList.of();
    TupleDomain<ColumnHandle> predicate = TupleDomain.all();
    TupleDomain<ColumnHandle> unenforcedConstraint = constraint.getSummary();
    Map<String, ColumnHandle> columns = getColumnHandles(session, tableHandle);
    if (tableHandle.getTableName().equals(TpchTable.ORDERS.getTableName())) {
        if (partitioningEnabled) {
            ColumnHandle orderKeyColumn = columns.get(columnNaming.getName(OrderColumn.ORDER_KEY));
            tablePartitioning = Optional.of(new ConnectorTablePartitioning(new TpchPartitioningHandle(TpchTable.ORDERS.getTableName(), calculateTotalRows(OrderGenerator.SCALE_BASE, tableHandle.getScaleFactor())), ImmutableList.of(orderKeyColumn)));
            partitioningColumns = Optional.of(ImmutableSet.of(orderKeyColumn));
            localProperties = ImmutableList.of(new SortingProperty<>(orderKeyColumn, SortOrder.ASC_NULLS_FIRST));
        }
        if (predicatePushdownEnabled) {
            predicate = toTupleDomain(ImmutableMap.of(toColumnHandle(OrderColumn.ORDER_STATUS), filterValues(ORDER_STATUS_NULLABLE_VALUES, OrderColumn.ORDER_STATUS, constraint)));
            unenforcedConstraint = filterOutColumnFromPredicate(constraint.getSummary(), toColumnHandle(OrderColumn.ORDER_STATUS));
        }
    } else if (predicatePushdownEnabled && tableHandle.getTableName().equals(TpchTable.PART.getTableName())) {
        predicate = toTupleDomain(ImmutableMap.of(toColumnHandle(PartColumn.CONTAINER), filterValues(PART_CONTAINER_NULLABLE_VALUES, PartColumn.CONTAINER, constraint), toColumnHandle(PartColumn.TYPE), filterValues(PART_TYPE_NULLABLE_VALUES, PartColumn.TYPE, constraint)));
        unenforcedConstraint = filterOutColumnFromPredicate(constraint.getSummary(), toColumnHandle(PartColumn.CONTAINER));
        unenforcedConstraint = filterOutColumnFromPredicate(unenforcedConstraint, toColumnHandle(PartColumn.TYPE));
    } else if (tableHandle.getTableName().equals(TpchTable.LINE_ITEM.getTableName())) {
        if (partitioningEnabled) {
            ColumnHandle orderKeyColumn = columns.get(columnNaming.getName(LineItemColumn.ORDER_KEY));
            tablePartitioning = Optional.of(new ConnectorTablePartitioning(new TpchPartitioningHandle(TpchTable.ORDERS.getTableName(), calculateTotalRows(OrderGenerator.SCALE_BASE, tableHandle.getScaleFactor())), ImmutableList.of(orderKeyColumn)));
            partitioningColumns = Optional.of(ImmutableSet.of(orderKeyColumn));
            localProperties = ImmutableList.of(new SortingProperty<>(orderKeyColumn, SortOrder.ASC_NULLS_FIRST), new SortingProperty<>(columns.get(columnNaming.getName(LineItemColumn.LINE_NUMBER)), SortOrder.ASC_NULLS_FIRST));
        }
    }
    ConnectorTableLayout layout = new ConnectorTableLayout(new TpchTableLayoutHandle(tableHandle, predicate), Optional.empty(), // TODO: conditionally return well-known properties (e.g., orderkey > 0, etc)
    predicate, tablePartitioning, partitioningColumns, Optional.empty(), localProperties);
    return ImmutableList.of(new ConnectorTableLayoutResult(layout, unenforcedConstraint));
}
Also used : ColumnHandle(com.facebook.presto.spi.ColumnHandle) Collectors.toSet(java.util.stream.Collectors.toSet) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ConnectorTablePartitioning(com.facebook.presto.spi.ConnectorTablePartitioning) SortingProperty(com.facebook.presto.spi.SortingProperty) ConnectorTableLayoutResult(com.facebook.presto.spi.ConnectorTableLayoutResult) ConnectorTableLayout(com.facebook.presto.spi.ConnectorTableLayout) LocalProperty(com.facebook.presto.spi.LocalProperty)

Aggregations

ColumnHandle (com.facebook.presto.spi.ColumnHandle)2 ConnectorTableLayout (com.facebook.presto.spi.ConnectorTableLayout)2 ConnectorTablePartitioning (com.facebook.presto.spi.ConnectorTablePartitioning)2 LocalProperty (com.facebook.presto.spi.LocalProperty)2 ImmutableSet (com.google.common.collect.ImmutableSet)2 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)2 Set (java.util.Set)2 Collectors.toSet (java.util.stream.Collectors.toSet)2 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)1 DwrfTableEncryptionProperties.forTable (com.facebook.presto.hive.DwrfTableEncryptionProperties.forTable)1 HiveBucketing.getHiveBucketHandle (com.facebook.presto.hive.HiveBucketing.getHiveBucketHandle)1 HiveSessionProperties.shouldCreateEmptyBucketFilesForTemporaryTable (com.facebook.presto.hive.HiveSessionProperties.shouldCreateEmptyBucketFilesForTemporaryTable)1 HiveTableProperties.getEncryptTable (com.facebook.presto.hive.HiveTableProperties.getEncryptTable)1 HiveTableProperties.isExternalTable (com.facebook.presto.hive.HiveTableProperties.isExternalTable)1 HiveUtil.translateHiveUnsupportedTypeForTemporaryTable (com.facebook.presto.hive.HiveUtil.translateHiveUnsupportedTypeForTemporaryTable)1 HiveUtil.translateHiveUnsupportedTypesForTemporaryTable (com.facebook.presto.hive.HiveUtil.translateHiveUnsupportedTypesForTemporaryTable)1 MetastoreContext (com.facebook.presto.hive.metastore.MetastoreContext)1 Table (com.facebook.presto.hive.metastore.Table)1 ConnectorTableLayoutResult (com.facebook.presto.spi.ConnectorTableLayoutResult)1 Constraint (com.facebook.presto.spi.Constraint)1