Search in sources :

Example 1 with DiscretePredicates

use of com.facebook.presto.spi.DiscretePredicates in project presto by prestodb.

the class AbstractTestHiveClient method assertExpectedTableLayout.

protected void assertExpectedTableLayout(ConnectorTableLayout actualTableLayout, ConnectorTableLayout expectedTableLayout) {
    assertExpectedTableLayoutHandle(actualTableLayout.getHandle(), expectedTableLayout.getHandle());
    assertEquals(actualTableLayout.getPredicate(), expectedTableLayout.getPredicate());
    assertEquals(actualTableLayout.getDiscretePredicates().isPresent(), expectedTableLayout.getDiscretePredicates().isPresent());
    actualTableLayout.getDiscretePredicates().ifPresent(actual -> {
        DiscretePredicates expected = expectedTableLayout.getDiscretePredicates().get();
        assertEquals(actual.getColumns(), expected.getColumns());
        assertEqualsIgnoreOrder(actual.getPredicates(), expected.getPredicates());
    });
    assertEquals(actualTableLayout.getStreamPartitioningColumns(), expectedTableLayout.getStreamPartitioningColumns());
    assertEquals(actualTableLayout.getLocalProperties(), expectedTableLayout.getLocalProperties());
}
Also used : DiscretePredicates(com.facebook.presto.spi.DiscretePredicates)

Example 2 with DiscretePredicates

use of com.facebook.presto.spi.DiscretePredicates in project presto by prestodb.

the class HiveMetadata method getTableLayout.

@Override
public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTableLayoutHandle layoutHandle) {
    HiveTableLayoutHandle hiveLayoutHandle = (HiveTableLayoutHandle) layoutHandle;
    List<ColumnHandle> partitionColumns = ImmutableList.copyOf(hiveLayoutHandle.getPartitionColumns());
    List<HivePartition> partitions = hiveLayoutHandle.getPartitions().get();
    Optional<DiscretePredicates> discretePredicates = Optional.empty();
    if (!partitionColumns.isEmpty()) {
        // Do not create tuple domains for every partition at the same time!
        // There can be a huge number of partitions so use an iterable so
        // all domains do not need to be in memory at the same time.
        Iterable<TupleDomain<ColumnHandle>> partitionDomains = Iterables.transform(partitions, (hivePartition) -> TupleDomain.fromFixedValues(hivePartition.getKeys()));
        discretePredicates = Optional.of(new DiscretePredicates(partitionColumns, partitionDomains));
    }
    Optional<ConnectorTablePartitioning> tablePartitioning = Optional.empty();
    SchemaTableName tableName = hiveLayoutHandle.getSchemaTableName();
    MetastoreContext metastoreContext = getMetastoreContext(session);
    Table table = metastore.getTable(metastoreContext, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
    // never ignore table bucketing for temporary tables as those are created such explicitly by the engine request
    boolean bucketExecutionEnabled = table.getTableType().equals(TEMPORARY_TABLE) || isBucketExecutionEnabled(session);
    if (bucketExecutionEnabled && hiveLayoutHandle.getBucketHandle().isPresent()) {
        HiveBucketHandle hiveBucketHandle = hiveLayoutHandle.getBucketHandle().get();
        HivePartitioningHandle partitioningHandle;
        int bucketCount = hiveBucketHandle.getReadBucketCount();
        OptionalInt maxCompatibleBucketCount = OptionalInt.empty();
        // Virtually bucketed table does not have table bucket property
        if (hiveBucketHandle.isVirtuallyBucketed()) {
            partitioningHandle = createHiveCompatiblePartitioningHandle(bucketCount, hiveBucketHandle.getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), maxCompatibleBucketCount);
        } else {
            HiveBucketProperty bucketProperty = table.getStorage().getBucketProperty().orElseThrow(() -> new IllegalArgumentException("bucketProperty is expected to be present"));
            switch(bucketProperty.getBucketFunctionType()) {
                case HIVE_COMPATIBLE:
                    partitioningHandle = createHiveCompatiblePartitioningHandle(bucketCount, hiveBucketHandle.getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), maxCompatibleBucketCount);
                    break;
                case PRESTO_NATIVE:
                    partitioningHandle = createPrestoNativePartitioningHandle(bucketCount, bucketProperty.getTypes().get(), maxCompatibleBucketCount);
                    break;
                default:
                    throw new IllegalArgumentException("Unsupported bucket function type " + bucketProperty.getBucketFunctionType());
            }
        }
        tablePartitioning = Optional.of(new ConnectorTablePartitioning(partitioningHandle, hiveBucketHandle.getColumns().stream().map(ColumnHandle.class::cast).collect(toImmutableList())));
    }
    TupleDomain<ColumnHandle> predicate;
    if (hiveLayoutHandle.isPushdownFilterEnabled()) {
        predicate = hiveLayoutHandle.getDomainPredicate().transform(subfield -> isEntireColumn(subfield) ? subfield.getRootName() : null).transform(hiveLayoutHandle.getPredicateColumns()::get).transform(ColumnHandle.class::cast).intersect(createPredicate(partitionColumns, partitions));
    } else {
        predicate = createPredicate(partitionColumns, partitions);
    }
    // Expose ordering property of the table.
    ImmutableList.Builder<LocalProperty<ColumnHandle>> localProperties = ImmutableList.builder();
    Optional<Set<ColumnHandle>> streamPartitionColumns = Optional.empty();
    if (table.getStorage().getBucketProperty().isPresent() && !table.getStorage().getBucketProperty().get().getSortedBy().isEmpty()) {
        ImmutableSet.Builder<ColumnHandle> streamPartitionColumnsBuilder = ImmutableSet.builder();
        // streamPartitioningColumns is how we partition the data across splits.
        // localProperty is how we partition the data within a split.
        // 1. add partition columns to streamPartitionColumns
        partitionColumns.forEach(streamPartitionColumnsBuilder::add);
        // 2. add sorted columns to streamPartitionColumns and localProperties
        HiveBucketProperty bucketProperty = table.getStorage().getBucketProperty().get();
        Map<String, ColumnHandle> columnHandles = hiveColumnHandles(table).stream().collect(toImmutableMap(HiveColumnHandle::getName, identity()));
        bucketProperty.getSortedBy().forEach(sortingColumn -> {
            ColumnHandle columnHandle = columnHandles.get(sortingColumn.getColumnName());
            localProperties.add(new SortingProperty<>(columnHandle, sortingColumn.getOrder().getSortOrder()));
            streamPartitionColumnsBuilder.add(columnHandle);
        });
        // We currently only set streamPartitionColumns when it enables streaming aggregation and also it's eligible to enable streaming aggregation
        // 1. When the bucket columns are the same as the prefix of the sort columns
        // 2. When all rows of the same value group are guaranteed to be in the same split. We disable splitting a file when isStreamingAggregationEnabled is true to make sure the property is guaranteed.
        List<String> sortColumns = bucketProperty.getSortedBy().stream().map(SortingColumn::getColumnName).collect(toImmutableList());
        if (bucketProperty.getBucketedBy().size() <= sortColumns.size() && bucketProperty.getBucketedBy().containsAll(sortColumns.subList(0, bucketProperty.getBucketedBy().size())) && isStreamingAggregationEnabled(session)) {
            streamPartitionColumns = Optional.of(streamPartitionColumnsBuilder.build());
        }
    }
    return new ConnectorTableLayout(hiveLayoutHandle, Optional.empty(), predicate, tablePartitioning, streamPartitionColumns, discretePredicates, localProperties.build(), Optional.of(hiveLayoutHandle.getRemainingPredicate()));
}
Also used : ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableSet(com.google.common.collect.ImmutableSet) InMemoryRecordSet(com.facebook.presto.spi.InMemoryRecordSet) HashSet(java.util.HashSet) Set(java.util.Set) Collectors.toSet(java.util.stream.Collectors.toSet) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConnectorTablePartitioning(com.facebook.presto.spi.ConnectorTablePartitioning) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableSet(com.google.common.collect.ImmutableSet) ColumnHandle(com.facebook.presto.spi.ColumnHandle) SystemTable(com.facebook.presto.spi.SystemTable) DwrfTableEncryptionProperties.forTable(com.facebook.presto.hive.DwrfTableEncryptionProperties.forTable) HiveUtil.translateHiveUnsupportedTypeForTemporaryTable(com.facebook.presto.hive.HiveUtil.translateHiveUnsupportedTypeForTemporaryTable) Table(com.facebook.presto.hive.metastore.Table) HiveUtil.translateHiveUnsupportedTypesForTemporaryTable(com.facebook.presto.hive.HiveUtil.translateHiveUnsupportedTypesForTemporaryTable) HiveTableProperties.getEncryptTable(com.facebook.presto.hive.HiveTableProperties.getEncryptTable) HiveSessionProperties.shouldCreateEmptyBucketFilesForTemporaryTable(com.facebook.presto.hive.HiveSessionProperties.shouldCreateEmptyBucketFilesForTemporaryTable) HiveTableProperties.isExternalTable(com.facebook.presto.hive.HiveTableProperties.isExternalTable) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) DiscretePredicates(com.facebook.presto.spi.DiscretePredicates) OptionalInt(java.util.OptionalInt) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Constraint(com.facebook.presto.spi.Constraint) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) ConnectorTableLayout(com.facebook.presto.spi.ConnectorTableLayout) HiveBucketing.getHiveBucketHandle(com.facebook.presto.hive.HiveBucketing.getHiveBucketHandle) LocalProperty(com.facebook.presto.spi.LocalProperty)

Example 3 with DiscretePredicates

use of com.facebook.presto.spi.DiscretePredicates in project presto by prestodb.

the class AbstractTestHiveClient method setupHive.

protected void setupHive(String connectorId, String databaseName, String timeZoneId) {
    clientId = connectorId;
    database = databaseName;
    tablePartitionFormat = new SchemaTableName(database, "presto_test_partition_format");
    tableUnpartitioned = new SchemaTableName(database, "presto_test_unpartitioned");
    tableOffline = new SchemaTableName(database, "presto_test_offline");
    tableOfflinePartition = new SchemaTableName(database, "presto_test_offline_partition");
    tableNotReadable = new SchemaTableName(database, "presto_test_not_readable");
    view = new SchemaTableName(database, "presto_test_view");
    invalidTable = new SchemaTableName(database, INVALID_TABLE);
    tableBucketedStringInt = new SchemaTableName(database, "presto_test_bucketed_by_string_int");
    tableBucketedBigintBoolean = new SchemaTableName(database, "presto_test_bucketed_by_bigint_boolean");
    tableBucketedDoubleFloat = new SchemaTableName(database, "presto_test_bucketed_by_double_float");
    tablePartitionSchemaChange = new SchemaTableName(database, "presto_test_partition_schema_change");
    tablePartitionSchemaChangeNonCanonical = new SchemaTableName(database, "presto_test_partition_schema_change_non_canonical");
    tableBucketEvolution = new SchemaTableName(database, "presto_test_bucket_evolution");
    invalidClientId = "hive";
    invalidTableHandle = new HiveTableHandle(database, INVALID_TABLE);
    invalidTableLayoutHandle = new HiveTableLayoutHandle(invalidTable, "path", ImmutableList.of(), ImmutableList.of(), ImmutableMap.of(), ImmutableList.of(new HivePartition(invalidTable, "unknown", ImmutableMap.of())), TupleDomain.all(), TRUE_CONSTANT, ImmutableMap.of(), TupleDomain.all(), Optional.empty(), Optional.empty(), false, "layout", Optional.empty(), false);
    int partitionColumnIndex = MAX_PARTITION_KEY_COLUMN_INDEX;
    dsColumn = new HiveColumnHandle("ds", HIVE_STRING, parseTypeSignature(StandardTypes.VARCHAR), partitionColumnIndex--, PARTITION_KEY, Optional.empty(), Optional.empty());
    fileFormatColumn = new HiveColumnHandle("file_format", HIVE_STRING, parseTypeSignature(StandardTypes.VARCHAR), partitionColumnIndex--, PARTITION_KEY, Optional.empty(), Optional.empty());
    dummyColumn = new HiveColumnHandle("dummy", HIVE_INT, parseTypeSignature(StandardTypes.INTEGER), partitionColumnIndex--, PARTITION_KEY, Optional.empty(), Optional.empty());
    intColumn = new HiveColumnHandle("t_int", HIVE_INT, parseTypeSignature(StandardTypes.INTEGER), partitionColumnIndex--, PARTITION_KEY, Optional.empty(), Optional.empty());
    invalidColumnHandle = new HiveColumnHandle(INVALID_COLUMN, HIVE_STRING, parseTypeSignature(StandardTypes.VARCHAR), 0, REGULAR, Optional.empty(), Optional.empty());
    List<HiveColumnHandle> partitionColumns = ImmutableList.of(dsColumn, fileFormatColumn, dummyColumn);
    List<HivePartition> partitions = ImmutableList.<HivePartition>builder().add(new HivePartition(tablePartitionFormat, "ds=2012-12-29/file_format=textfile/dummy=1", ImmutableMap.<ColumnHandle, NullableValue>builder().put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))).put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("textfile"))).put(dummyColumn, NullableValue.of(INTEGER, 1L)).build())).add(new HivePartition(tablePartitionFormat, "ds=2012-12-29/file_format=sequencefile/dummy=2", ImmutableMap.<ColumnHandle, NullableValue>builder().put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))).put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("sequencefile"))).put(dummyColumn, NullableValue.of(INTEGER, 2L)).build())).add(new HivePartition(tablePartitionFormat, "ds=2012-12-29/file_format=rctext/dummy=3", ImmutableMap.<ColumnHandle, NullableValue>builder().put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))).put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("rctext"))).put(dummyColumn, NullableValue.of(INTEGER, 3L)).build())).add(new HivePartition(tablePartitionFormat, "ds=2012-12-29/file_format=rcbinary/dummy=4", ImmutableMap.<ColumnHandle, NullableValue>builder().put(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))).put(fileFormatColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("rcbinary"))).put(dummyColumn, NullableValue.of(INTEGER, 4L)).build())).build();
    partitionCount = partitions.size();
    tupleDomain = TupleDomain.fromFixedValues(ImmutableMap.of(dsColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2012-12-29"))));
    TupleDomain<Subfield> domainPredicate = tupleDomain.transform(HiveColumnHandle.class::cast).transform(column -> new Subfield(column.getName(), ImmutableList.of()));
    tableLayout = new ConnectorTableLayout(new HiveTableLayoutHandle(tablePartitionFormat, "path", partitionColumns, ImmutableList.of(new Column("t_string", HIVE_STRING, Optional.empty(), Optional.empty()), new Column("t_tinyint", HIVE_BYTE, Optional.empty(), Optional.empty()), new Column("t_smallint", HIVE_SHORT, Optional.empty(), Optional.empty()), new Column("t_int", HIVE_INT, Optional.empty(), Optional.empty()), new Column("t_bigint", HIVE_LONG, Optional.empty(), Optional.empty()), new Column("t_float", HIVE_FLOAT, Optional.empty(), Optional.empty()), new Column("t_double", HIVE_DOUBLE, Optional.empty(), Optional.empty()), new Column("t_boolean", HIVE_BOOLEAN, Optional.empty(), Optional.empty())), ImmutableMap.of(), partitions, domainPredicate, TRUE_CONSTANT, ImmutableMap.of(dsColumn.getName(), dsColumn), tupleDomain, Optional.empty(), Optional.empty(), false, "layout", Optional.empty(), false), Optional.empty(), withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("textfile")), Range.equal(createUnboundedVarcharType(), utf8Slice("sequencefile")), Range.equal(createUnboundedVarcharType(), utf8Slice("rctext")), Range.equal(createUnboundedVarcharType(), utf8Slice("rcbinary"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 1L), Range.equal(INTEGER, 2L), Range.equal(INTEGER, 3L), Range.equal(INTEGER, 4L)), false))), Optional.empty(), Optional.empty(), Optional.of(new DiscretePredicates(ImmutableList.copyOf(partitionColumns), ImmutableList.of(withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("textfile"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 1L)), false))), withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("sequencefile"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 2L)), false))), withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("rctext"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 3L)), false))), withColumnDomains(ImmutableMap.of(dsColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("2012-12-29"))), false), fileFormatColumn, Domain.create(ValueSet.ofRanges(Range.equal(createUnboundedVarcharType(), utf8Slice("rcbinary"))), false), dummyColumn, Domain.create(ValueSet.ofRanges(Range.equal(INTEGER, 4L)), false)))))), ImmutableList.of());
    List<HivePartition> unpartitionedPartitions = ImmutableList.of(new HivePartition(tableUnpartitioned));
    unpartitionedTableLayout = new ConnectorTableLayout(new HiveTableLayoutHandle(tableUnpartitioned, "path", ImmutableList.of(), ImmutableList.of(new Column("t_string", HIVE_STRING, Optional.empty(), Optional.empty()), new Column("t_tinyint", HIVE_BYTE, Optional.empty(), Optional.empty())), ImmutableMap.of(), unpartitionedPartitions, TupleDomain.all(), TRUE_CONSTANT, ImmutableMap.of(), TupleDomain.all(), Optional.empty(), Optional.empty(), false, "layout", Optional.empty(), false));
    timeZone = DateTimeZone.forTimeZone(TimeZone.getTimeZone(ZoneId.of(timeZoneId)));
}
Also used : HiveColumnHandle.bucketColumnHandle(com.facebook.presto.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(com.facebook.presto.spi.ColumnHandle) NullableValue(com.facebook.presto.common.predicate.NullableValue) DiscretePredicates(com.facebook.presto.spi.DiscretePredicates) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Constraint(com.facebook.presto.spi.Constraint) ConnectorTableLayout(com.facebook.presto.spi.ConnectorTableLayout) Column(com.facebook.presto.hive.metastore.Column) SortingColumn(com.facebook.presto.hive.metastore.SortingColumn) Subfield(com.facebook.presto.common.Subfield)

Aggregations

DiscretePredicates (com.facebook.presto.spi.DiscretePredicates)3 ColumnHandle (com.facebook.presto.spi.ColumnHandle)2 ConnectorTableLayout (com.facebook.presto.spi.ConnectorTableLayout)2 Constraint (com.facebook.presto.spi.Constraint)2 SchemaTableName (com.facebook.presto.spi.SchemaTableName)2 Subfield (com.facebook.presto.common.Subfield)1 NullableValue (com.facebook.presto.common.predicate.NullableValue)1 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)1 DwrfTableEncryptionProperties.forTable (com.facebook.presto.hive.DwrfTableEncryptionProperties.forTable)1 HiveBucketing.getHiveBucketHandle (com.facebook.presto.hive.HiveBucketing.getHiveBucketHandle)1 HiveColumnHandle.bucketColumnHandle (com.facebook.presto.hive.HiveColumnHandle.bucketColumnHandle)1 HiveSessionProperties.shouldCreateEmptyBucketFilesForTemporaryTable (com.facebook.presto.hive.HiveSessionProperties.shouldCreateEmptyBucketFilesForTemporaryTable)1 HiveTableProperties.getEncryptTable (com.facebook.presto.hive.HiveTableProperties.getEncryptTable)1 HiveTableProperties.isExternalTable (com.facebook.presto.hive.HiveTableProperties.isExternalTable)1 HiveUtil.translateHiveUnsupportedTypeForTemporaryTable (com.facebook.presto.hive.HiveUtil.translateHiveUnsupportedTypeForTemporaryTable)1 HiveUtil.translateHiveUnsupportedTypesForTemporaryTable (com.facebook.presto.hive.HiveUtil.translateHiveUnsupportedTypesForTemporaryTable)1 Column (com.facebook.presto.hive.metastore.Column)1 MetastoreContext (com.facebook.presto.hive.metastore.MetastoreContext)1 SortingColumn (com.facebook.presto.hive.metastore.SortingColumn)1 Table (com.facebook.presto.hive.metastore.Table)1