Search in sources :

Example 1 with TimestampLogicalTypeAnnotation

use of org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation in project hive by apache.

the class TestETypeConverter method createInt64TimestampType.

private PrimitiveType createInt64TimestampType(boolean isAdjustedToUTC, TimeUnit unit) {
    TimestampLogicalTypeAnnotation logicalType = TimestampLogicalTypeAnnotation.timestampType(isAdjustedToUTC, unit);
    PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT64).as(logicalType).named("value");
    return primitiveType;
}
Also used : TimestampLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) PrimitiveType(org.apache.parquet.schema.PrimitiveType)

Example 2 with TimestampLogicalTypeAnnotation

use of org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation in project hive by apache.

the class ParquetDataColumnReaderFactory method getDataColumnReaderByTypeHelper.

private static ParquetDataColumnReader getDataColumnReaderByTypeHelper(boolean isDictionary, PrimitiveType parquetType, TypeInfo hiveType, Dictionary dictionary, ValuesReader valuesReader, boolean skipTimestampConversion, ZoneId writerTimezone, boolean legacyConversionEnabled) throws IOException {
    // max length for varchar and char cases
    int length = getVarcharLength(hiveType);
    TypeInfo realHiveType = (hiveType instanceof ListTypeInfo) ? ((ListTypeInfo) hiveType).getListElementTypeInfo() : hiveType;
    String typeName = TypeInfoUtils.getBaseName(realHiveType.getTypeName());
    int hivePrecision = (typeName.equalsIgnoreCase(serdeConstants.DECIMAL_TYPE_NAME)) ? ((DecimalTypeInfo) realHiveType).getPrecision() : 0;
    int hiveScale = (typeName.equalsIgnoreCase(serdeConstants.DECIMAL_TYPE_NAME)) ? ((DecimalTypeInfo) realHiveType).getScale() : 0;
    switch(parquetType.getPrimitiveTypeName()) {
        case INT32:
            if (ETypeConverter.isUnsignedInteger(parquetType)) {
                return isDictionary ? new TypesFromUInt32PageReader(dictionary, length, hivePrecision, hiveScale) : new TypesFromUInt32PageReader(valuesReader, length, hivePrecision, hiveScale);
            } else if (parquetType.getLogicalTypeAnnotation() instanceof DecimalLogicalTypeAnnotation) {
                DecimalLogicalTypeAnnotation logicalType = (DecimalLogicalTypeAnnotation) parquetType.getLogicalTypeAnnotation();
                final short scale = (short) logicalType.getScale();
                return isDictionary ? new TypesFromInt32DecimalPageReader(dictionary, length, scale, hivePrecision, hiveScale) : new TypesFromInt32DecimalPageReader(valuesReader, length, scale, hivePrecision, hiveScale);
            } else {
                return isDictionary ? new TypesFromInt32PageReader(dictionary, length, hivePrecision, hiveScale) : new TypesFromInt32PageReader(valuesReader, length, hivePrecision, hiveScale);
            }
        case INT64:
            LogicalTypeAnnotation logicalType = parquetType.getLogicalTypeAnnotation();
            if (logicalType instanceof TimestampLogicalTypeAnnotation) {
                TimestampLogicalTypeAnnotation timestampLogicalType = (TimestampLogicalTypeAnnotation) logicalType;
                boolean isAdjustedToUTC = timestampLogicalType.isAdjustedToUTC();
                TimeUnit timeUnit = timestampLogicalType.getUnit();
                return isDictionary ? new TypesFromInt64PageReader(dictionary, length, isAdjustedToUTC, timeUnit) : new TypesFromInt64PageReader(valuesReader, length, isAdjustedToUTC, timeUnit);
            }
            if (ETypeConverter.isUnsignedInteger(parquetType)) {
                return isDictionary ? new TypesFromUInt64PageReader(dictionary, length, hivePrecision, hiveScale) : new TypesFromUInt64PageReader(valuesReader, length, hivePrecision, hiveScale);
            }
            if (logicalType instanceof DecimalLogicalTypeAnnotation) {
                DecimalLogicalTypeAnnotation decimalLogicalType = (DecimalLogicalTypeAnnotation) logicalType;
                final short scale = (short) decimalLogicalType.getScale();
                return isDictionary ? new TypesFromInt64DecimalPageReader(dictionary, length, scale, hivePrecision, hiveScale) : new TypesFromInt64DecimalPageReader(valuesReader, length, scale, hivePrecision, hiveScale);
            }
            return isDictionary ? new TypesFromInt64PageReader(dictionary, length, hivePrecision, hiveScale) : new TypesFromInt64PageReader(valuesReader, length, hivePrecision, hiveScale);
        case FLOAT:
            return isDictionary ? new TypesFromFloatPageReader(dictionary, length, hivePrecision, hiveScale) : new TypesFromFloatPageReader(valuesReader, length, hivePrecision, hiveScale);
        case INT96:
            ZoneId targetZone = skipTimestampConversion ? ZoneOffset.UTC : firstNonNull(writerTimezone, TimeZone.getDefault().toZoneId());
            return isDictionary ? new TypesFromInt96PageReader(dictionary, length, targetZone, legacyConversionEnabled) : new TypesFromInt96PageReader(valuesReader, length, targetZone, legacyConversionEnabled);
        case BOOLEAN:
            return isDictionary ? new TypesFromBooleanPageReader(dictionary, length) : new TypesFromBooleanPageReader(valuesReader, length);
        case BINARY:
        case FIXED_LEN_BYTE_ARRAY:
            return getConvertorFromBinary(isDictionary, parquetType, hiveType, valuesReader, dictionary);
        case DOUBLE:
            return isDictionary ? new TypesFromDoublePageReader(dictionary, length, hivePrecision, hiveScale) : new TypesFromDoublePageReader(valuesReader, length, hivePrecision, hiveScale);
        default:
            return isDictionary ? new DefaultParquetDataColumnReader(dictionary, length, hivePrecision, hiveScale) : new DefaultParquetDataColumnReader(valuesReader, length, hivePrecision, hiveScale);
    }
}
Also used : DecimalLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) ZoneId(java.time.ZoneId) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) StringLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.StringLogicalTypeAnnotation) DecimalLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) LogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation) TimestampLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TimestampLogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation) TimeUnit(org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit)

Aggregations

TimestampLogicalTypeAnnotation (org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation)2 ZoneId (java.time.ZoneId)1 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)1 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)1 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)1 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)1 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)1 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)1 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)1 LogicalTypeAnnotation (org.apache.parquet.schema.LogicalTypeAnnotation)1 DecimalLogicalTypeAnnotation (org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation)1 StringLogicalTypeAnnotation (org.apache.parquet.schema.LogicalTypeAnnotation.StringLogicalTypeAnnotation)1 TimeUnit (org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit)1 PrimitiveType (org.apache.parquet.schema.PrimitiveType)1