Search in sources :

Example 1 with HiveType

use of io.trino.plugin.hive.HiveType in project trino by trinodb.

the class GlueStatConverter method toColumnStatistics.

private static ColumnStatistics toColumnStatistics(Column column, HiveColumnStatistics statistics, OptionalLong rowCount) {
    ColumnStatistics columnStatistics = new ColumnStatistics();
    HiveType columnType = column.getType();
    columnStatistics.setColumnName(column.getName());
    columnStatistics.setColumnType(columnType.toString());
    ColumnStatisticsData catalogColumnStatisticsData = toGlueColumnStatisticsData(statistics, columnType, rowCount);
    columnStatistics.setStatisticsData(catalogColumnStatisticsData);
    columnStatistics.setAnalyzedTime(new Date());
    return columnStatistics;
}
Also used : HiveColumnStatistics.createStringColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createStringColumnStatistics) HiveColumnStatistics.createBinaryColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createBinaryColumnStatistics) HiveColumnStatistics.createDecimalColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createDecimalColumnStatistics) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) HiveColumnStatistics.createDateColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createDateColumnStatistics) ColumnStatistics(com.amazonaws.services.glue.model.ColumnStatistics) HiveColumnStatistics.createIntegerColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics) HiveColumnStatistics.createDoubleColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createDoubleColumnStatistics) HiveColumnStatistics.createBooleanColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createBooleanColumnStatistics) HiveType(io.trino.plugin.hive.HiveType) ColumnStatisticsData(com.amazonaws.services.glue.model.ColumnStatisticsData) BinaryColumnStatisticsData(com.amazonaws.services.glue.model.BinaryColumnStatisticsData) DoubleColumnStatisticsData(com.amazonaws.services.glue.model.DoubleColumnStatisticsData) StringColumnStatisticsData(com.amazonaws.services.glue.model.StringColumnStatisticsData) DecimalColumnStatisticsData(com.amazonaws.services.glue.model.DecimalColumnStatisticsData) DateColumnStatisticsData(com.amazonaws.services.glue.model.DateColumnStatisticsData) LongColumnStatisticsData(com.amazonaws.services.glue.model.LongColumnStatisticsData) BooleanColumnStatisticsData(com.amazonaws.services.glue.model.BooleanColumnStatisticsData) Date(java.util.Date) LocalDate(java.time.LocalDate)

Example 2 with HiveType

use of io.trino.plugin.hive.HiveType in project trino by trinodb.

the class GlueStatConverter method toGlueColumnStatisticsData.

private static ColumnStatisticsData toGlueColumnStatisticsData(HiveColumnStatistics statistics, HiveType columnType, OptionalLong rowCount) {
    TypeInfo typeInfo = columnType.getTypeInfo();
    checkArgument(typeInfo.getCategory() == PRIMITIVE, "Unsupported statistics type: %s", columnType);
    ColumnStatisticsData catalogColumnStatisticsData = new ColumnStatisticsData();
    switch(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) {
        case BOOLEAN:
            {
                BooleanColumnStatisticsData data = new BooleanColumnStatisticsData();
                statistics.getNullsCount().ifPresent(data::setNumberOfNulls);
                statistics.getBooleanStatistics().ifPresent(booleanStatistics -> {
                    booleanStatistics.getFalseCount().ifPresent(data::setNumberOfFalses);
                    booleanStatistics.getTrueCount().ifPresent(data::setNumberOfTrues);
                });
                catalogColumnStatisticsData.setType(ColumnStatisticsType.BOOLEAN.toString());
                catalogColumnStatisticsData.setBooleanColumnStatisticsData(data);
                break;
            }
        case BINARY:
            {
                BinaryColumnStatisticsData data = new BinaryColumnStatisticsData();
                statistics.getNullsCount().ifPresent(data::setNumberOfNulls);
                data.setMaximumLength(statistics.getMaxValueSizeInBytes().orElse(0));
                data.setAverageLength(getAverageColumnLength(statistics.getTotalSizeInBytes(), rowCount, statistics.getNullsCount()).orElse(0));
                catalogColumnStatisticsData.setType(ColumnStatisticsType.BINARY.toString());
                catalogColumnStatisticsData.setBinaryColumnStatisticsData(data);
                break;
            }
        case DATE:
            {
                DateColumnStatisticsData data = new DateColumnStatisticsData();
                statistics.getDateStatistics().ifPresent(dateStatistics -> {
                    dateStatistics.getMin().ifPresent(value -> data.setMinimumValue(localDateToDate(value)));
                    dateStatistics.getMax().ifPresent(value -> data.setMaximumValue(localDateToDate(value)));
                });
                statistics.getNullsCount().ifPresent(data::setNumberOfNulls);
                toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumberOfDistinctValues);
                catalogColumnStatisticsData.setType(ColumnStatisticsType.DATE.toString());
                catalogColumnStatisticsData.setDateColumnStatisticsData(data);
                break;
            }
        case DECIMAL:
            {
                DecimalColumnStatisticsData data = new DecimalColumnStatisticsData();
                statistics.getDecimalStatistics().ifPresent(decimalStatistics -> {
                    decimalStatistics.getMin().ifPresent(value -> data.setMinimumValue(bigDecimalToGlueDecimal(value)));
                    decimalStatistics.getMax().ifPresent(value -> data.setMaximumValue(bigDecimalToGlueDecimal(value)));
                });
                statistics.getNullsCount().ifPresent(data::setNumberOfNulls);
                toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumberOfDistinctValues);
                catalogColumnStatisticsData.setType(ColumnStatisticsType.DECIMAL.toString());
                catalogColumnStatisticsData.setDecimalColumnStatisticsData(data);
                break;
            }
        case FLOAT:
        case DOUBLE:
            {
                DoubleColumnStatisticsData data = new DoubleColumnStatisticsData();
                statistics.getDoubleStatistics().ifPresent(doubleStatistics -> {
                    doubleStatistics.getMin().ifPresent(data::setMinimumValue);
                    doubleStatistics.getMax().ifPresent(data::setMaximumValue);
                });
                statistics.getNullsCount().ifPresent(data::setNumberOfNulls);
                toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumberOfDistinctValues);
                catalogColumnStatisticsData.setType(ColumnStatisticsType.DOUBLE.toString());
                catalogColumnStatisticsData.setDoubleColumnStatisticsData(data);
                break;
            }
        case BYTE:
        case SHORT:
        case INT:
        case LONG:
        case TIMESTAMP:
            {
                LongColumnStatisticsData data = new LongColumnStatisticsData();
                statistics.getIntegerStatistics().ifPresent(stats -> {
                    stats.getMin().ifPresent(data::setMinimumValue);
                    stats.getMax().ifPresent(data::setMaximumValue);
                });
                statistics.getNullsCount().ifPresent(data::setNumberOfNulls);
                toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumberOfDistinctValues);
                catalogColumnStatisticsData.setType(ColumnStatisticsType.LONG.toString());
                catalogColumnStatisticsData.setLongColumnStatisticsData(data);
                break;
            }
        case VARCHAR:
        case CHAR:
        case STRING:
            {
                StringColumnStatisticsData data = new StringColumnStatisticsData();
                statistics.getNullsCount().ifPresent(data::setNumberOfNulls);
                toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumberOfDistinctValues);
                data.setMaximumLength(statistics.getMaxValueSizeInBytes().orElse(0));
                data.setAverageLength(getAverageColumnLength(statistics.getTotalSizeInBytes(), rowCount, statistics.getNullsCount()).orElse(0));
                catalogColumnStatisticsData.setType(ColumnStatisticsType.STRING.toString());
                catalogColumnStatisticsData.setStringColumnStatisticsData(data);
                break;
            }
        default:
            throw new TrinoException(HIVE_INVALID_METADATA, "Invalid column statistics type: " + ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
    }
    return catalogColumnStatisticsData;
}
Also used : HiveColumnStatistics.createStringColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createStringColumnStatistics) Date(java.util.Date) OptionalDouble(java.util.OptionalDouble) HiveColumnStatistics.createBinaryColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createBinaryColumnStatistics) HiveColumnStatistics.createDecimalColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createDecimalColumnStatistics) ByteBuffer(java.nio.ByteBuffer) HiveType(io.trino.plugin.hive.HiveType) BigDecimal(java.math.BigDecimal) OptionalLong(java.util.OptionalLong) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) HIVE_INVALID_METADATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA) ColumnStatisticsData(com.amazonaws.services.glue.model.ColumnStatisticsData) HiveColumnStatistics.createDateColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createDateColumnStatistics) ThriftMetastoreUtil.getTotalSizeInBytes(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getTotalSizeInBytes) ThriftMetastoreUtil.toMetastoreDistinctValuesCount(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreDistinctValuesCount) Column(io.trino.plugin.hive.metastore.Column) BinaryColumnStatisticsData(com.amazonaws.services.glue.model.BinaryColumnStatisticsData) Map(java.util.Map) DoubleColumnStatisticsData(com.amazonaws.services.glue.model.DoubleColumnStatisticsData) BigInteger(java.math.BigInteger) StringColumnStatisticsData(com.amazonaws.services.glue.model.StringColumnStatisticsData) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ThriftMetastoreUtil.fromMetastoreDistinctValuesCount(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreDistinctValuesCount) DecimalNumber(com.amazonaws.services.glue.model.DecimalNumber) DecimalColumnStatisticsData(com.amazonaws.services.glue.model.DecimalColumnStatisticsData) Table(io.trino.plugin.hive.metastore.Table) ColumnStatistics(com.amazonaws.services.glue.model.ColumnStatistics) HiveColumnStatistics.createIntegerColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Decimal(org.apache.hadoop.hive.metastore.api.Decimal) TrinoException(io.trino.spi.TrinoException) ColumnStatisticsType(com.amazonaws.services.glue.model.ColumnStatisticsType) DateColumnStatisticsData(com.amazonaws.services.glue.model.DateColumnStatisticsData) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) TimeUnit(java.util.concurrent.TimeUnit) HiveColumnStatistics.createDoubleColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createDoubleColumnStatistics) LongColumnStatisticsData(com.amazonaws.services.glue.model.LongColumnStatisticsData) List(java.util.List) ThriftMetastoreUtil.getAverageColumnLength(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getAverageColumnLength) BooleanColumnStatisticsData(com.amazonaws.services.glue.model.BooleanColumnStatisticsData) HiveColumnStatistics.createBooleanColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createBooleanColumnStatistics) LocalDate(java.time.LocalDate) Optional(java.util.Optional) ThriftMetastoreUtil.fromMetastoreNullsCount(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreNullsCount) Partition(io.trino.plugin.hive.metastore.Partition) DateColumnStatisticsData(com.amazonaws.services.glue.model.DateColumnStatisticsData) DecimalColumnStatisticsData(com.amazonaws.services.glue.model.DecimalColumnStatisticsData) StringColumnStatisticsData(com.amazonaws.services.glue.model.StringColumnStatisticsData) BooleanColumnStatisticsData(com.amazonaws.services.glue.model.BooleanColumnStatisticsData) BinaryColumnStatisticsData(com.amazonaws.services.glue.model.BinaryColumnStatisticsData) TrinoException(io.trino.spi.TrinoException) LongColumnStatisticsData(com.amazonaws.services.glue.model.LongColumnStatisticsData) DoubleColumnStatisticsData(com.amazonaws.services.glue.model.DoubleColumnStatisticsData) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ColumnStatisticsData(com.amazonaws.services.glue.model.ColumnStatisticsData) BinaryColumnStatisticsData(com.amazonaws.services.glue.model.BinaryColumnStatisticsData) DoubleColumnStatisticsData(com.amazonaws.services.glue.model.DoubleColumnStatisticsData) StringColumnStatisticsData(com.amazonaws.services.glue.model.StringColumnStatisticsData) DecimalColumnStatisticsData(com.amazonaws.services.glue.model.DecimalColumnStatisticsData) DateColumnStatisticsData(com.amazonaws.services.glue.model.DateColumnStatisticsData) LongColumnStatisticsData(com.amazonaws.services.glue.model.LongColumnStatisticsData) BooleanColumnStatisticsData(com.amazonaws.services.glue.model.BooleanColumnStatisticsData) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 3 with HiveType

use of io.trino.plugin.hive.HiveType in project trino by trinodb.

the class HiveCoercionPolicy method canCoerceForMap.

private boolean canCoerceForMap(HiveType fromHiveType, HiveType toHiveType) {
    if (fromHiveType.getCategory() != Category.MAP || toHiveType.getCategory() != Category.MAP) {
        return false;
    }
    HiveType fromKeyType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
    HiveType fromValueType = HiveType.valueOf(((MapTypeInfo) fromHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
    HiveType toKeyType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapKeyTypeInfo().getTypeName());
    HiveType toValueType = HiveType.valueOf(((MapTypeInfo) toHiveType.getTypeInfo()).getMapValueTypeInfo().getTypeName());
    return (fromKeyType.equals(toKeyType) || canCoerce(fromKeyType, toKeyType)) && (fromValueType.equals(toValueType) || canCoerce(fromValueType, toValueType));
}
Also used : MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) HiveType(io.trino.plugin.hive.HiveType)

Example 4 with HiveType

use of io.trino.plugin.hive.HiveType in project trino by trinodb.

the class HiveCoercionPolicy method canCoerceForList.

private boolean canCoerceForList(HiveType fromHiveType, HiveType toHiveType) {
    if (fromHiveType.getCategory() != Category.LIST || toHiveType.getCategory() != Category.LIST) {
        return false;
    }
    HiveType fromElementType = HiveType.valueOf(((ListTypeInfo) fromHiveType.getTypeInfo()).getListElementTypeInfo().getTypeName());
    HiveType toElementType = HiveType.valueOf(((ListTypeInfo) toHiveType.getTypeInfo()).getListElementTypeInfo().getTypeName());
    return fromElementType.equals(toElementType) || canCoerce(fromElementType, toElementType);
}
Also used : ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) HiveType(io.trino.plugin.hive.HiveType)

Example 5 with HiveType

use of io.trino.plugin.hive.HiveType in project trino by trinodb.

the class HiveUtil method getRegularColumnHandles.

public static List<HiveColumnHandle> getRegularColumnHandles(Table table, TypeManager typeManager, HiveTimestampPrecision timestampPrecision) {
    ImmutableList.Builder<HiveColumnHandle> columns = ImmutableList.builder();
    int hiveColumnIndex = 0;
    for (Column field : table.getDataColumns()) {
        // ignore unsupported types rather than failing
        HiveType hiveType = field.getType();
        if (hiveType.isSupportedType(table.getStorage().getStorageFormat())) {
            columns.add(createBaseColumn(field.getName(), hiveColumnIndex, hiveType, hiveType.getType(typeManager, timestampPrecision), REGULAR, field.getComment()));
        }
        hiveColumnIndex++;
    }
    return columns.build();
}
Also used : Column(io.trino.plugin.hive.metastore.Column) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) HiveType(io.trino.plugin.hive.HiveType) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle)

Aggregations

HiveType (io.trino.plugin.hive.HiveType)15 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)8 ImmutableList (com.google.common.collect.ImmutableList)7 Column (io.trino.plugin.hive.metastore.Column)6 HiveColumnStatistics (io.trino.plugin.hive.metastore.HiveColumnStatistics)5 TrinoException (io.trino.spi.TrinoException)5 List (java.util.List)5 Map (java.util.Map)5 Optional (java.util.Optional)5 Type (io.trino.spi.type.Type)4 ImmutableMap (com.google.common.collect.ImmutableMap)3 ImmutableSet (com.google.common.collect.ImmutableSet)3 PartitionStatistics (io.trino.plugin.hive.PartitionStatistics)3 Set (java.util.Set)3 BinaryColumnStatisticsData (com.amazonaws.services.glue.model.BinaryColumnStatisticsData)2 BooleanColumnStatisticsData (com.amazonaws.services.glue.model.BooleanColumnStatisticsData)2 ColumnStatistics (com.amazonaws.services.glue.model.ColumnStatistics)2 ColumnStatisticsData (com.amazonaws.services.glue.model.ColumnStatisticsData)2 DateColumnStatisticsData (com.amazonaws.services.glue.model.DateColumnStatisticsData)2 DecimalColumnStatisticsData (com.amazonaws.services.glue.model.DecimalColumnStatisticsData)2