Search in sources :

Example 1 with HiveBucketProperty

use of io.trino.plugin.hive.HiveBucketProperty in project trino by trinodb.

the class HiveBucketing method getHiveBucketFilter.

public static Optional<HiveBucketFilter> getHiveBucketFilter(HiveTableHandle hiveTable, TupleDomain<ColumnHandle> effectivePredicate) {
    if (hiveTable.getBucketHandle().isEmpty()) {
        return Optional.empty();
    }
    HiveBucketProperty hiveBucketProperty = hiveTable.getBucketHandle().get().toTableBucketProperty();
    List<Column> dataColumns = hiveTable.getDataColumns().stream().map(HiveColumnHandle::toMetastoreColumn).collect(toImmutableList());
    Optional<Map<ColumnHandle, List<NullableValue>>> bindings = TupleDomain.extractDiscreteValues(effectivePredicate);
    if (bindings.isEmpty()) {
        return Optional.empty();
    }
    Optional<Set<Integer>> buckets = getHiveBuckets(hiveBucketProperty, dataColumns, bindings.get());
    if (buckets.isPresent()) {
        return Optional.of(new HiveBucketFilter(buckets.get()));
    }
    Optional<Domain> domain = effectivePredicate.getDomains().flatMap(domains -> domains.entrySet().stream().filter(entry -> ((HiveColumnHandle) entry.getKey()).getName().equals(BUCKET_COLUMN_NAME)).findFirst().map(Entry::getValue));
    if (domain.isEmpty()) {
        return Optional.empty();
    }
    ValueSet values = domain.get().getValues();
    ImmutableSet.Builder<Integer> builder = ImmutableSet.builder();
    int bucketCount = hiveBucketProperty.getBucketCount();
    for (int i = 0; i < bucketCount; i++) {
        if (values.containsValue((long) i)) {
            builder.add(i);
        }
    }
    return Optional.of(new HiveBucketFilter(builder.build()));
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) BUCKETING_V2(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V2) Lists.cartesianProduct(com.google.common.collect.Lists.cartesianProduct) BUCKETING_V1(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V1) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) Column(io.trino.plugin.hive.metastore.Column) Map(java.util.Map) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) ImmutableSet(com.google.common.collect.ImmutableSet) Table(io.trino.plugin.hive.metastore.Table) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) HiveTimestampPrecision(io.trino.plugin.hive.HiveTimestampPrecision) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) ValueSet(io.trino.spi.predicate.ValueSet) Objects(java.util.Objects) List(java.util.List) BUCKET_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) StandardErrorCode(io.trino.spi.StandardErrorCode) Entry(java.util.Map.Entry) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) NullableValue(io.trino.spi.predicate.NullableValue) Page(io.trino.spi.Page) HashMap(java.util.HashMap) HiveBucketProperty(io.trino.plugin.hive.HiveBucketProperty) HashSet(java.util.HashSet) HiveType(io.trino.plugin.hive.HiveType) HIVE_INVALID_METADATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA) ImmutableList(com.google.common.collect.ImmutableList) HiveTableHandle(io.trino.plugin.hive.HiveTableHandle) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ColumnHandle(io.trino.spi.connector.ColumnHandle) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TABLE_BUCKETING_VERSION(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_BUCKETING_VERSION) HiveBucketHandle(io.trino.plugin.hive.HiveBucketHandle) HiveUtil.getRegularColumnHandles(io.trino.plugin.hive.util.HiveUtil.getRegularColumnHandles) HiveSessionProperties.getTimestampPrecision(io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision) SPARK_TABLE_PROVIDER_KEY(io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TypeManager(io.trino.spi.type.TypeManager) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) ValueSet(io.trino.spi.predicate.ValueSet) HashSet(java.util.HashSet) NullableValue(io.trino.spi.predicate.NullableValue) HiveBucketProperty(io.trino.plugin.hive.HiveBucketProperty) ImmutableSet(com.google.common.collect.ImmutableSet) Column(io.trino.plugin.hive.metastore.Column) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) Map(java.util.Map) HashMap(java.util.HashMap) ValueSet(io.trino.spi.predicate.ValueSet)

Example 2 with HiveBucketProperty

use of io.trino.plugin.hive.HiveBucketProperty in project trino by trinodb.

the class HiveBucketing method getHiveBuckets.

private static Optional<Set<Integer>> getHiveBuckets(HiveBucketProperty hiveBucketProperty, List<Column> dataColumns, Map<ColumnHandle, List<NullableValue>> bindings) {
    if (bindings.isEmpty()) {
        return Optional.empty();
    }
    // Get bucket columns names
    List<String> bucketColumns = hiveBucketProperty.getBucketedBy();
    // Verify the bucket column types are supported
    Map<String, HiveType> hiveTypes = new HashMap<>();
    for (Column column : dataColumns) {
        hiveTypes.put(column.getName(), column.getType());
    }
    for (String column : bucketColumns) {
        if (!SUPPORTED_TYPES_FOR_BUCKET_FILTER.contains(hiveTypes.get(column))) {
            return Optional.empty();
        }
    }
    // Get bindings for bucket columns
    Map<String, List<NullableValue>> bucketBindings = new HashMap<>();
    for (Entry<ColumnHandle, List<NullableValue>> entry : bindings.entrySet()) {
        HiveColumnHandle columnHandle = (HiveColumnHandle) entry.getKey();
        if (bucketColumns.contains(columnHandle.getName())) {
            bucketBindings.put(columnHandle.getName(), entry.getValue());
        }
    }
    // Check that we have bindings for all bucket columns
    if (bucketBindings.size() != bucketColumns.size()) {
        return Optional.empty();
    }
    // Order bucket column bindings accordingly to bucket columns order
    List<List<NullableValue>> orderedBindings = bucketColumns.stream().map(bucketBindings::get).collect(toImmutableList());
    // Get TypeInfos for bucket columns
    List<TypeInfo> typeInfos = bucketColumns.stream().map(name -> hiveTypes.get(name).getTypeInfo()).collect(toImmutableList());
    return getHiveBuckets(hiveBucketProperty.getBucketingVersion(), hiveBucketProperty.getBucketCount(), typeInfos, orderedBindings);
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) BUCKETING_V2(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V2) Lists.cartesianProduct(com.google.common.collect.Lists.cartesianProduct) BUCKETING_V1(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V1) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) Column(io.trino.plugin.hive.metastore.Column) Map(java.util.Map) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) ImmutableSet(com.google.common.collect.ImmutableSet) Table(io.trino.plugin.hive.metastore.Table) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) HiveTimestampPrecision(io.trino.plugin.hive.HiveTimestampPrecision) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) ValueSet(io.trino.spi.predicate.ValueSet) Objects(java.util.Objects) List(java.util.List) BUCKET_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) StandardErrorCode(io.trino.spi.StandardErrorCode) Entry(java.util.Map.Entry) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) NullableValue(io.trino.spi.predicate.NullableValue) Page(io.trino.spi.Page) HashMap(java.util.HashMap) HiveBucketProperty(io.trino.plugin.hive.HiveBucketProperty) HashSet(java.util.HashSet) HiveType(io.trino.plugin.hive.HiveType) HIVE_INVALID_METADATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA) ImmutableList(com.google.common.collect.ImmutableList) HiveTableHandle(io.trino.plugin.hive.HiveTableHandle) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ColumnHandle(io.trino.spi.connector.ColumnHandle) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TABLE_BUCKETING_VERSION(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_BUCKETING_VERSION) HiveBucketHandle(io.trino.plugin.hive.HiveBucketHandle) HiveUtil.getRegularColumnHandles(io.trino.plugin.hive.util.HiveUtil.getRegularColumnHandles) HiveSessionProperties.getTimestampPrecision(io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision) SPARK_TABLE_PROVIDER_KEY(io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TypeManager(io.trino.spi.type.TypeManager) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) HashMap(java.util.HashMap) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Column(io.trino.plugin.hive.metastore.Column) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HiveType(io.trino.plugin.hive.HiveType) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle)

Example 3 with HiveBucketProperty

use of io.trino.plugin.hive.HiveBucketProperty in project trino by trinodb.

the class TestGlueInputConverter method assertStorage.

private static void assertStorage(StorageDescriptor actual, Storage expected) {
    assertEquals(actual.getLocation(), expected.getLocation());
    assertEquals(actual.getSerdeInfo().getSerializationLibrary(), expected.getStorageFormat().getSerde());
    assertEquals(actual.getInputFormat(), expected.getStorageFormat().getInputFormat());
    assertEquals(actual.getOutputFormat(), expected.getStorageFormat().getOutputFormat());
    if (expected.getBucketProperty().isPresent()) {
        HiveBucketProperty bucketProperty = expected.getBucketProperty().get();
        assertEquals(actual.getBucketColumns(), bucketProperty.getBucketedBy());
        assertEquals(actual.getNumberOfBuckets().intValue(), bucketProperty.getBucketCount());
    }
}
Also used : HiveBucketProperty(io.trino.plugin.hive.HiveBucketProperty)

Example 4 with HiveBucketProperty

use of io.trino.plugin.hive.HiveBucketProperty in project trino by trinodb.

the class TestGlueToTrinoConverter method assertStorage.

private static void assertStorage(Storage actual, StorageDescriptor expected) {
    assertEquals(actual.getLocation(), expected.getLocation());
    assertEquals(actual.getStorageFormat().getSerde(), expected.getSerdeInfo().getSerializationLibrary());
    assertEquals(actual.getStorageFormat().getInputFormat(), expected.getInputFormat());
    assertEquals(actual.getStorageFormat().getOutputFormat(), expected.getOutputFormat());
    if (!isNullOrEmpty(expected.getBucketColumns())) {
        HiveBucketProperty bucketProperty = actual.getBucketProperty().get();
        assertEquals(bucketProperty.getBucketedBy(), expected.getBucketColumns());
        assertEquals(bucketProperty.getBucketCount(), expected.getNumberOfBuckets().intValue());
    }
}
Also used : HiveBucketProperty(io.trino.plugin.hive.HiveBucketProperty)

Example 5 with HiveBucketProperty

use of io.trino.plugin.hive.HiveBucketProperty in project trino by trinodb.

the class HiveBucketing method getHiveBucketHandle.

public static Optional<HiveBucketHandle> getHiveBucketHandle(ConnectorSession session, Table table, TypeManager typeManager) {
    if (table.getParameters().containsKey(SPARK_TABLE_PROVIDER_KEY)) {
        return Optional.empty();
    }
    Optional<HiveBucketProperty> hiveBucketProperty = table.getStorage().getBucketProperty();
    if (hiveBucketProperty.isEmpty()) {
        return Optional.empty();
    }
    if (!isSupportedBucketing(table)) {
        return Optional.empty();
    }
    HiveTimestampPrecision timestampPrecision = getTimestampPrecision(session);
    Map<String, HiveColumnHandle> map = getRegularColumnHandles(table, typeManager, timestampPrecision).stream().collect(Collectors.toMap(HiveColumnHandle::getName, identity()));
    ImmutableList.Builder<HiveColumnHandle> bucketColumns = ImmutableList.builder();
    for (String bucketColumnName : hiveBucketProperty.get().getBucketedBy()) {
        HiveColumnHandle bucketColumnHandle = map.get(bucketColumnName);
        if (bucketColumnHandle == null) {
            throw new TrinoException(HIVE_INVALID_METADATA, format("Table '%s.%s' is bucketed on non-existent column '%s'", table.getDatabaseName(), table.getTableName(), bucketColumnName));
        }
        bucketColumns.add(bucketColumnHandle);
    }
    BucketingVersion bucketingVersion = hiveBucketProperty.get().getBucketingVersion();
    int bucketCount = hiveBucketProperty.get().getBucketCount();
    List<SortingColumn> sortedBy = hiveBucketProperty.get().getSortedBy();
    return Optional.of(new HiveBucketHandle(bucketColumns.build(), bucketingVersion, bucketCount, bucketCount, sortedBy));
}
Also used : SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) HiveTimestampPrecision(io.trino.plugin.hive.HiveTimestampPrecision) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) HiveBucketProperty(io.trino.plugin.hive.HiveBucketProperty) HiveBucketHandle(io.trino.plugin.hive.HiveBucketHandle) TrinoException(io.trino.spi.TrinoException) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle)

Aggregations

HiveBucketProperty (io.trino.plugin.hive.HiveBucketProperty)8 SortingColumn (io.trino.plugin.hive.metastore.SortingColumn)4 ImmutableList (com.google.common.collect.ImmutableList)3 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)3 HiveBucketHandle (io.trino.plugin.hive.HiveBucketHandle)3 HiveColumnHandle (io.trino.plugin.hive.HiveColumnHandle)3 HiveTimestampPrecision (io.trino.plugin.hive.HiveTimestampPrecision)3 TrinoException (io.trino.spi.TrinoException)3 JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)2 JsonProperty (com.fasterxml.jackson.annotation.JsonProperty)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 ImmutableSet (com.google.common.collect.ImmutableSet)2 Lists.cartesianProduct (com.google.common.collect.Lists.cartesianProduct)2 BUCKET_COLUMN_NAME (io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME)2 HIVE_INVALID_METADATA (io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA)2 HiveSessionProperties.getTimestampPrecision (io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision)2 HiveTableHandle (io.trino.plugin.hive.HiveTableHandle)2 HiveType (io.trino.plugin.hive.HiveType)2 Column (io.trino.plugin.hive.metastore.Column)2 Table (io.trino.plugin.hive.metastore.Table)2