Search in sources :

Example 16 with ColumnHandle

use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.

the class HiveBucketing method getHiveBucketFilter.

public static Optional<HiveBucketFilter> getHiveBucketFilter(HiveTableHandle hiveTable, TupleDomain<ColumnHandle> effectivePredicate) {
    if (hiveTable.getBucketHandle().isEmpty()) {
        return Optional.empty();
    }
    HiveBucketProperty hiveBucketProperty = hiveTable.getBucketHandle().get().toTableBucketProperty();
    List<Column> dataColumns = hiveTable.getDataColumns().stream().map(HiveColumnHandle::toMetastoreColumn).collect(toImmutableList());
    Optional<Map<ColumnHandle, List<NullableValue>>> bindings = TupleDomain.extractDiscreteValues(effectivePredicate);
    if (bindings.isEmpty()) {
        return Optional.empty();
    }
    Optional<Set<Integer>> buckets = getHiveBuckets(hiveBucketProperty, dataColumns, bindings.get());
    if (buckets.isPresent()) {
        return Optional.of(new HiveBucketFilter(buckets.get()));
    }
    Optional<Domain> domain = effectivePredicate.getDomains().flatMap(domains -> domains.entrySet().stream().filter(entry -> ((HiveColumnHandle) entry.getKey()).getName().equals(BUCKET_COLUMN_NAME)).findFirst().map(Entry::getValue));
    if (domain.isEmpty()) {
        return Optional.empty();
    }
    ValueSet values = domain.get().getValues();
    ImmutableSet.Builder<Integer> builder = ImmutableSet.builder();
    int bucketCount = hiveBucketProperty.getBucketCount();
    for (int i = 0; i < bucketCount; i++) {
        if (values.containsValue((long) i)) {
            builder.add(i);
        }
    }
    return Optional.of(new HiveBucketFilter(builder.build()));
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) BUCKETING_V2(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V2) Lists.cartesianProduct(com.google.common.collect.Lists.cartesianProduct) BUCKETING_V1(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V1) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) Column(io.trino.plugin.hive.metastore.Column) Map(java.util.Map) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) ImmutableSet(com.google.common.collect.ImmutableSet) Table(io.trino.plugin.hive.metastore.Table) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) HiveTimestampPrecision(io.trino.plugin.hive.HiveTimestampPrecision) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) ValueSet(io.trino.spi.predicate.ValueSet) Objects(java.util.Objects) List(java.util.List) BUCKET_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) StandardErrorCode(io.trino.spi.StandardErrorCode) Entry(java.util.Map.Entry) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) NullableValue(io.trino.spi.predicate.NullableValue) Page(io.trino.spi.Page) HashMap(java.util.HashMap) HiveBucketProperty(io.trino.plugin.hive.HiveBucketProperty) HashSet(java.util.HashSet) HiveType(io.trino.plugin.hive.HiveType) HIVE_INVALID_METADATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA) ImmutableList(com.google.common.collect.ImmutableList) HiveTableHandle(io.trino.plugin.hive.HiveTableHandle) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ColumnHandle(io.trino.spi.connector.ColumnHandle) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TABLE_BUCKETING_VERSION(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_BUCKETING_VERSION) HiveBucketHandle(io.trino.plugin.hive.HiveBucketHandle) HiveUtil.getRegularColumnHandles(io.trino.plugin.hive.util.HiveUtil.getRegularColumnHandles) HiveSessionProperties.getTimestampPrecision(io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision) SPARK_TABLE_PROVIDER_KEY(io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TypeManager(io.trino.spi.type.TypeManager) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) ValueSet(io.trino.spi.predicate.ValueSet) HashSet(java.util.HashSet) NullableValue(io.trino.spi.predicate.NullableValue) HiveBucketProperty(io.trino.plugin.hive.HiveBucketProperty) ImmutableSet(com.google.common.collect.ImmutableSet) Column(io.trino.plugin.hive.metastore.Column) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) Map(java.util.Map) HashMap(java.util.HashMap) ValueSet(io.trino.spi.predicate.ValueSet)

Example 17 with ColumnHandle

use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.

the class HiveBucketing method getHiveBuckets.

private static Optional<Set<Integer>> getHiveBuckets(HiveBucketProperty hiveBucketProperty, List<Column> dataColumns, Map<ColumnHandle, List<NullableValue>> bindings) {
    if (bindings.isEmpty()) {
        return Optional.empty();
    }
    // Get bucket columns names
    List<String> bucketColumns = hiveBucketProperty.getBucketedBy();
    // Verify the bucket column types are supported
    Map<String, HiveType> hiveTypes = new HashMap<>();
    for (Column column : dataColumns) {
        hiveTypes.put(column.getName(), column.getType());
    }
    for (String column : bucketColumns) {
        if (!SUPPORTED_TYPES_FOR_BUCKET_FILTER.contains(hiveTypes.get(column))) {
            return Optional.empty();
        }
    }
    // Get bindings for bucket columns
    Map<String, List<NullableValue>> bucketBindings = new HashMap<>();
    for (Entry<ColumnHandle, List<NullableValue>> entry : bindings.entrySet()) {
        HiveColumnHandle columnHandle = (HiveColumnHandle) entry.getKey();
        if (bucketColumns.contains(columnHandle.getName())) {
            bucketBindings.put(columnHandle.getName(), entry.getValue());
        }
    }
    // Check that we have bindings for all bucket columns
    if (bucketBindings.size() != bucketColumns.size()) {
        return Optional.empty();
    }
    // Order bucket column bindings accordingly to bucket columns order
    List<List<NullableValue>> orderedBindings = bucketColumns.stream().map(bucketBindings::get).collect(toImmutableList());
    // Get TypeInfos for bucket columns
    List<TypeInfo> typeInfos = bucketColumns.stream().map(name -> hiveTypes.get(name).getTypeInfo()).collect(toImmutableList());
    return getHiveBuckets(hiveBucketProperty.getBucketingVersion(), hiveBucketProperty.getBucketCount(), typeInfos, orderedBindings);
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) BUCKETING_V2(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V2) Lists.cartesianProduct(com.google.common.collect.Lists.cartesianProduct) BUCKETING_V1(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V1) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) Column(io.trino.plugin.hive.metastore.Column) Map(java.util.Map) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) ImmutableSet(com.google.common.collect.ImmutableSet) Table(io.trino.plugin.hive.metastore.Table) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) HiveTimestampPrecision(io.trino.plugin.hive.HiveTimestampPrecision) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) ValueSet(io.trino.spi.predicate.ValueSet) Objects(java.util.Objects) List(java.util.List) BUCKET_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) StandardErrorCode(io.trino.spi.StandardErrorCode) Entry(java.util.Map.Entry) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) NullableValue(io.trino.spi.predicate.NullableValue) Page(io.trino.spi.Page) HashMap(java.util.HashMap) HiveBucketProperty(io.trino.plugin.hive.HiveBucketProperty) HashSet(java.util.HashSet) HiveType(io.trino.plugin.hive.HiveType) HIVE_INVALID_METADATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA) ImmutableList(com.google.common.collect.ImmutableList) HiveTableHandle(io.trino.plugin.hive.HiveTableHandle) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ColumnHandle(io.trino.spi.connector.ColumnHandle) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TABLE_BUCKETING_VERSION(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_BUCKETING_VERSION) HiveBucketHandle(io.trino.plugin.hive.HiveBucketHandle) HiveUtil.getRegularColumnHandles(io.trino.plugin.hive.util.HiveUtil.getRegularColumnHandles) HiveSessionProperties.getTimestampPrecision(io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision) SPARK_TABLE_PROVIDER_KEY(io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TypeManager(io.trino.spi.type.TypeManager) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) HashMap(java.util.HashMap) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Column(io.trino.plugin.hive.metastore.Column) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HiveType(io.trino.plugin.hive.HiveType) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle)

Example 18 with ColumnHandle

use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.

the class AbstractTestHive method assertGetRecords.

protected void assertGetRecords(String tableName, HiveStorageFormat hiveStorageFormat) throws Exception {
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, new SchemaTableName(database, tableName));
        ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, tableHandle);
        HiveSplit hiveSplit = getHiveSplit(tableHandle, transaction, session);
        List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
        ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, hiveSplit, tableHandle, columnHandles, DynamicFilter.EMPTY);
        assertGetRecords(hiveStorageFormat, tableMetadata, hiveSplit, pageSource, columnHandles);
    }
}
Also used : HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle)

Example 19 with ColumnHandle

use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.

the class AbstractTestHive method testBucketedTableBigintBoolean.

@SuppressWarnings("ConstantConditions")
@Test
public void testBucketedTableBigintBoolean() throws Exception {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableBucketedBigintBoolean);
        List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
        Map<String, Integer> columnIndex = indexColumns(columnHandles);
        assertTableIsBucketed(tableHandle, transaction, session);
        ConnectorTableProperties properties = metadata.getTableProperties(newSession(ImmutableMap.of("propagate_table_scan_sorting_properties", true)), tableHandle);
        // trino_test_bucketed_by_bigint_boolean does not define sorting, therefore local properties is empty
        assertTrue(properties.getLocalProperties().isEmpty());
        assertTrue(metadata.getTableProperties(newSession(), tableHandle).getLocalProperties().isEmpty());
        String testString = "test";
        Long testBigint = 89L;
        Boolean testBoolean = true;
        ImmutableMap<ColumnHandle, NullableValue> bindings = ImmutableMap.<ColumnHandle, NullableValue>builder().put(columnHandles.get(columnIndex.get("t_string")), NullableValue.of(createUnboundedVarcharType(), utf8Slice(testString))).put(columnHandles.get(columnIndex.get("t_bigint")), NullableValue.of(BIGINT, testBigint)).put(columnHandles.get(columnIndex.get("t_boolean")), NullableValue.of(BOOLEAN, testBoolean)).buildOrThrow();
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.fromFixedValues(bindings), OptionalInt.of(1), Optional.empty());
        boolean rowFound = false;
        for (MaterializedRow row : result) {
            if (testString.equals(row.getField(columnIndex.get("t_string"))) && testBigint.equals(row.getField(columnIndex.get("t_bigint"))) && testBoolean.equals(row.getField(columnIndex.get("t_boolean")))) {
                rowFound = true;
                break;
            }
        }
        assertTrue(rowFound);
    }
}
Also used : HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) NullableValue(io.trino.spi.predicate.NullableValue) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) OptionalLong(java.util.OptionalLong) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) MaterializedRow(io.trino.testing.MaterializedRow) Test(org.testng.annotations.Test)

Example 20 with ColumnHandle

use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.

the class AbstractTestHive method assertReadFailsWithMessageMatching.

protected void assertReadFailsWithMessageMatching(HiveStorageFormat storageFormat, SchemaTableName tableName, String regex) {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        assertTrinoExceptionThrownBy(() -> readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat))).hasErrorCode(HIVE_INVALID_BUCKET_FILES).hasMessageMatching(regex);
    }
}
Also used : HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle)

Aggregations

ColumnHandle (io.trino.spi.connector.ColumnHandle)268 Test (org.testng.annotations.Test)121 ImmutableList (com.google.common.collect.ImmutableList)106 TupleDomain (io.trino.spi.predicate.TupleDomain)104 ImmutableMap (com.google.common.collect.ImmutableMap)93 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)86 Domain (io.trino.spi.predicate.Domain)86 Map (java.util.Map)78 ConnectorSession (io.trino.spi.connector.ConnectorSession)76 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)71 Optional (java.util.Optional)70 List (java.util.List)69 SchemaTableName (io.trino.spi.connector.SchemaTableName)66 Constraint (io.trino.spi.connector.Constraint)61 Objects.requireNonNull (java.util.Objects.requireNonNull)53 ImmutableSet (com.google.common.collect.ImmutableSet)51 NullableValue (io.trino.spi.predicate.NullableValue)50 Type (io.trino.spi.type.Type)48 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)45 ColumnMetadata (io.trino.spi.connector.ColumnMetadata)45