Search in sources :

Example 6 with HiveType

use of io.trino.plugin.hive.HiveType in project trino by trinodb.

the class HiveBucketing method getHiveBuckets.

private static Optional<Set<Integer>> getHiveBuckets(HiveBucketProperty hiveBucketProperty, List<Column> dataColumns, Map<ColumnHandle, List<NullableValue>> bindings) {
    if (bindings.isEmpty()) {
        return Optional.empty();
    }
    // Get bucket columns names
    List<String> bucketColumns = hiveBucketProperty.getBucketedBy();
    // Verify the bucket column types are supported
    Map<String, HiveType> hiveTypes = new HashMap<>();
    for (Column column : dataColumns) {
        hiveTypes.put(column.getName(), column.getType());
    }
    for (String column : bucketColumns) {
        if (!SUPPORTED_TYPES_FOR_BUCKET_FILTER.contains(hiveTypes.get(column))) {
            return Optional.empty();
        }
    }
    // Get bindings for bucket columns
    Map<String, List<NullableValue>> bucketBindings = new HashMap<>();
    for (Entry<ColumnHandle, List<NullableValue>> entry : bindings.entrySet()) {
        HiveColumnHandle columnHandle = (HiveColumnHandle) entry.getKey();
        if (bucketColumns.contains(columnHandle.getName())) {
            bucketBindings.put(columnHandle.getName(), entry.getValue());
        }
    }
    // Check that we have bindings for all bucket columns
    if (bucketBindings.size() != bucketColumns.size()) {
        return Optional.empty();
    }
    // Order bucket column bindings accordingly to bucket columns order
    List<List<NullableValue>> orderedBindings = bucketColumns.stream().map(bucketBindings::get).collect(toImmutableList());
    // Get TypeInfos for bucket columns
    List<TypeInfo> typeInfos = bucketColumns.stream().map(name -> hiveTypes.get(name).getTypeInfo()).collect(toImmutableList());
    return getHiveBuckets(hiveBucketProperty.getBucketingVersion(), hiveBucketProperty.getBucketCount(), typeInfos, orderedBindings);
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) BUCKETING_V2(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V2) Lists.cartesianProduct(com.google.common.collect.Lists.cartesianProduct) BUCKETING_V1(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V1) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) Column(io.trino.plugin.hive.metastore.Column) Map(java.util.Map) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) ImmutableSet(com.google.common.collect.ImmutableSet) Table(io.trino.plugin.hive.metastore.Table) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) HiveTimestampPrecision(io.trino.plugin.hive.HiveTimestampPrecision) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) ValueSet(io.trino.spi.predicate.ValueSet) Objects(java.util.Objects) List(java.util.List) BUCKET_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) StandardErrorCode(io.trino.spi.StandardErrorCode) Entry(java.util.Map.Entry) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) NullableValue(io.trino.spi.predicate.NullableValue) Page(io.trino.spi.Page) HashMap(java.util.HashMap) HiveBucketProperty(io.trino.plugin.hive.HiveBucketProperty) HashSet(java.util.HashSet) HiveType(io.trino.plugin.hive.HiveType) HIVE_INVALID_METADATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA) ImmutableList(com.google.common.collect.ImmutableList) HiveTableHandle(io.trino.plugin.hive.HiveTableHandle) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ColumnHandle(io.trino.spi.connector.ColumnHandle) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TABLE_BUCKETING_VERSION(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_BUCKETING_VERSION) HiveBucketHandle(io.trino.plugin.hive.HiveBucketHandle) HiveUtil.getRegularColumnHandles(io.trino.plugin.hive.util.HiveUtil.getRegularColumnHandles) HiveSessionProperties.getTimestampPrecision(io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision) SPARK_TABLE_PROVIDER_KEY(io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY) ConnectorSession(io.trino.spi.connector.ConnectorSession) TupleDomain(io.trino.spi.predicate.TupleDomain) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) VisibleForTesting(com.google.common.annotations.VisibleForTesting) TypeManager(io.trino.spi.type.TypeManager) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) HashMap(java.util.HashMap) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Column(io.trino.plugin.hive.metastore.Column) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HiveType(io.trino.plugin.hive.HiveType) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle)

Example 7 with HiveType

use of io.trino.plugin.hive.HiveType in project trino by trinodb.

the class AbstractFileFormat method createSchema.

static Properties createSchema(HiveStorageFormat format, List<String> columnNames, List<Type> columnTypes) {
    Properties schema = new Properties();
    schema.setProperty(SERIALIZATION_LIB, format.getSerde());
    schema.setProperty(FILE_INPUT_FORMAT, format.getInputFormat());
    schema.setProperty(META_TABLE_COLUMNS, join(",", columnNames));
    schema.setProperty(META_TABLE_COLUMN_TYPES, columnTypes.stream().map(HiveType::toHiveType).map(HiveType::getHiveTypeName).map(HiveTypeName::toString).collect(joining(":")));
    return schema;
}
Also used : HiveTypeName(io.trino.plugin.hive.HiveTypeName) Properties(java.util.Properties) HiveType(io.trino.plugin.hive.HiveType) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType)

Example 8 with HiveType

use of io.trino.plugin.hive.HiveType in project trino by trinodb.

the class TestHiveBucketing method computeTrino.

private static int computeTrino(BucketingVersion bucketingVersion, List<String> hiveTypeStrings, List<Object> hiveValues, List<HiveType> hiveTypes, List<TypeInfo> hiveTypeInfos) {
    ImmutableList.Builder<Block> blockListBuilder = ImmutableList.builder();
    Object[] nativeContainerValues = new Object[hiveValues.size()];
    for (int i = 0; i < hiveTypeStrings.size(); i++) {
        Object hiveValue = hiveValues.get(i);
        Type type = hiveTypes.get(i).getType(TESTING_TYPE_MANAGER);
        BlockBuilder blockBuilder = type.createBlockBuilder(null, 3);
        // prepend 2 nulls to make sure position is respected when HiveBucketing function
        blockBuilder.appendNull();
        blockBuilder.appendNull();
        appendToBlockBuilder(type, hiveValue, blockBuilder);
        Block block = blockBuilder.build();
        blockListBuilder.add(block);
        nativeContainerValues[i] = toNativeContainerValue(type, hiveValue);
    }
    ImmutableList<Block> blockList = blockListBuilder.build();
    int result1 = bucketingVersion.getBucketHashCode(hiveTypeInfos, new Page(blockList.toArray(new Block[blockList.size()])), 2);
    int result2 = bucketingVersion.getBucketHashCode(hiveTypeInfos, nativeContainerValues);
    assertEquals(result1, result2, "overloads of getBucketHashCode produced different result");
    return result1;
}
Also used : DateType(io.trino.spi.type.DateType) BooleanType(io.trino.spi.type.BooleanType) TimestampType.createTimestampType(io.trino.spi.type.TimestampType.createTimestampType) SmallintType(io.trino.spi.type.SmallintType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) DoubleType(io.trino.spi.type.DoubleType) Type(io.trino.spi.type.Type) TimestampType(io.trino.spi.type.TimestampType) HiveType(io.trino.plugin.hive.HiveType) BigintType(io.trino.spi.type.BigintType) VarcharType(io.trino.spi.type.VarcharType) TinyintType(io.trino.spi.type.TinyintType) IntegerType(io.trino.spi.type.IntegerType) MapType(io.trino.spi.type.MapType) RealType(io.trino.spi.type.RealType) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) Block(io.trino.spi.block.Block) Page(io.trino.spi.Page) BlockBuilder(io.trino.spi.block.BlockBuilder)

Example 9 with HiveType

use of io.trino.plugin.hive.HiveType in project trino by trinodb.

the class HiveCoercionPolicy method canCoerce.

private boolean canCoerce(HiveType fromHiveType, HiveType toHiveType) {
    Type fromType = typeManager.getType(fromHiveType.getTypeSignature());
    Type toType = typeManager.getType(toHiveType.getTypeSignature());
    if (fromType instanceof VarcharType) {
        return toType instanceof VarcharType || toHiveType.equals(HIVE_BYTE) || toHiveType.equals(HIVE_SHORT) || toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG);
    }
    if (toType instanceof VarcharType) {
        return fromHiveType.equals(HIVE_BYTE) || fromHiveType.equals(HIVE_SHORT) || fromHiveType.equals(HIVE_INT) || fromHiveType.equals(HIVE_LONG);
    }
    if (fromHiveType.equals(HIVE_BYTE)) {
        return toHiveType.equals(HIVE_SHORT) || toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG);
    }
    if (fromHiveType.equals(HIVE_SHORT)) {
        return toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG);
    }
    if (fromHiveType.equals(HIVE_INT)) {
        return toHiveType.equals(HIVE_LONG);
    }
    if (fromHiveType.equals(HIVE_FLOAT)) {
        return toHiveType.equals(HIVE_DOUBLE) || toType instanceof DecimalType;
    }
    if (fromHiveType.equals(HIVE_DOUBLE)) {
        return toHiveType.equals(HIVE_FLOAT) || toType instanceof DecimalType;
    }
    if (fromType instanceof DecimalType) {
        return toType instanceof DecimalType || toHiveType.equals(HIVE_FLOAT) || toHiveType.equals(HIVE_DOUBLE);
    }
    return canCoerceForList(fromHiveType, toHiveType) || canCoerceForMap(fromHiveType, toHiveType) || canCoerceForStruct(fromHiveType, toHiveType);
}
Also used : Type(io.trino.spi.type.Type) HiveType(io.trino.plugin.hive.HiveType) VarcharType(io.trino.spi.type.VarcharType) DecimalType(io.trino.spi.type.DecimalType) VarcharType(io.trino.spi.type.VarcharType) DecimalType(io.trino.spi.type.DecimalType)

Example 10 with HiveType

use of io.trino.plugin.hive.HiveType in project trino by trinodb.

the class HiveUtil method getPartitionKeyColumnHandles.

public static List<HiveColumnHandle> getPartitionKeyColumnHandles(Table table, TypeManager typeManager) {
    ImmutableList.Builder<HiveColumnHandle> columns = ImmutableList.builder();
    List<Column> partitionKeys = table.getPartitionColumns();
    for (Column field : partitionKeys) {
        HiveType hiveType = field.getType();
        if (!hiveType.isSupportedType(table.getStorage().getStorageFormat())) {
            throw new TrinoException(NOT_SUPPORTED, format("Unsupported Hive type %s found in partition keys of table %s.%s", hiveType, table.getDatabaseName(), table.getTableName()));
        }
        columns.add(createBaseColumn(field.getName(), -1, hiveType, hiveType.getType(typeManager), PARTITION_KEY, field.getComment()));
    }
    return columns.build();
}
Also used : Column(io.trino.plugin.hive.metastore.Column) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) TrinoException(io.trino.spi.TrinoException) HiveType(io.trino.plugin.hive.HiveType) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle)

Aggregations

HiveType (io.trino.plugin.hive.HiveType)15 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)8 ImmutableList (com.google.common.collect.ImmutableList)7 Column (io.trino.plugin.hive.metastore.Column)6 HiveColumnStatistics (io.trino.plugin.hive.metastore.HiveColumnStatistics)5 TrinoException (io.trino.spi.TrinoException)5 List (java.util.List)5 Map (java.util.Map)5 Optional (java.util.Optional)5 Type (io.trino.spi.type.Type)4 ImmutableMap (com.google.common.collect.ImmutableMap)3 ImmutableSet (com.google.common.collect.ImmutableSet)3 PartitionStatistics (io.trino.plugin.hive.PartitionStatistics)3 Set (java.util.Set)3 BinaryColumnStatisticsData (com.amazonaws.services.glue.model.BinaryColumnStatisticsData)2 BooleanColumnStatisticsData (com.amazonaws.services.glue.model.BooleanColumnStatisticsData)2 ColumnStatistics (com.amazonaws.services.glue.model.ColumnStatistics)2 ColumnStatisticsData (com.amazonaws.services.glue.model.ColumnStatisticsData)2 DateColumnStatisticsData (com.amazonaws.services.glue.model.DateColumnStatisticsData)2 DecimalColumnStatisticsData (com.amazonaws.services.glue.model.DecimalColumnStatisticsData)2