Search in sources :

Example 1 with DecimalMetadata

use of org.apache.parquet.schema.DecimalMetadata in project drill by axbaretto.

the class DrillParquetGroupConverter method getConverterForType.

@SuppressWarnings("resource")
private PrimitiveConverter getConverterForType(String name, PrimitiveType type) {
    switch(type.getPrimitiveTypeName()) {
        case INT32:
            {
                if (type.getOriginalType() == null) {
                    IntWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).integer() : mapWriter.integer(name);
                    return new DrillIntConverter(writer);
                }
                switch(type.getOriginalType()) {
                    case UINT_8:
                    case UINT_16:
                    case UINT_32:
                    case INT_8:
                    case INT_16:
                    case INT_32:
                        {
                            IntWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).integer() : mapWriter.integer(name);
                            return new DrillIntConverter(writer);
                        }
                    case DECIMAL:
                        {
                            ParquetReaderUtility.checkDecimalTypeEnabled(options);
                            Decimal9Writer writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).decimal9() : mapWriter.decimal9(name, type.getDecimalMetadata().getScale(), type.getDecimalMetadata().getPrecision());
                            return new DrillDecimal9Converter(writer, type.getDecimalMetadata().getPrecision(), type.getDecimalMetadata().getScale());
                        }
                    case DATE:
                        {
                            DateWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).date() : mapWriter.date(name);
                            switch(containsCorruptedDates) {
                                case META_SHOWS_CORRUPTION:
                                    return new DrillCorruptedDateConverter(writer);
                                case META_SHOWS_NO_CORRUPTION:
                                    return new DrillDateConverter(writer);
                                case META_UNCLEAR_TEST_VALUES:
                                    return new CorruptionDetectingDateConverter(writer);
                                default:
                                    throw new DrillRuntimeException(String.format("Issue setting up parquet reader for date type, " + "unrecognized date corruption status %s. See DRILL-4203 for more info.", containsCorruptedDates));
                            }
                        }
                    case TIME_MILLIS:
                        {
                            TimeWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).time() : mapWriter.time(name);
                            return new DrillTimeConverter(writer);
                        }
                    default:
                        {
                            throw new UnsupportedOperationException("Unsupported type: " + type.getOriginalType());
                        }
                }
            }
        case INT64:
            {
                if (type.getOriginalType() == null) {
                    BigIntWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).bigInt() : mapWriter.bigInt(name);
                    return new DrillBigIntConverter(writer);
                }
                switch(type.getOriginalType()) {
                    case UINT_64:
                    case INT_64:
                        {
                            BigIntWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).bigInt() : mapWriter.bigInt(name);
                            return new DrillBigIntConverter(writer);
                        }
                    case DECIMAL:
                        {
                            ParquetReaderUtility.checkDecimalTypeEnabled(options);
                            Decimal18Writer writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).decimal18() : mapWriter.decimal18(name, type.getDecimalMetadata().getScale(), type.getDecimalMetadata().getPrecision());
                            return new DrillDecimal18Converter(writer, type.getDecimalMetadata().getPrecision(), type.getDecimalMetadata().getScale());
                        }
                    case TIMESTAMP_MILLIS:
                        {
                            TimeStampWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).timeStamp() : mapWriter.timeStamp(name);
                            return new DrillTimeStampConverter(writer);
                        }
                    default:
                        {
                            throw new UnsupportedOperationException("Unsupported type " + type.getOriginalType());
                        }
                }
            }
        case INT96:
            {
                // TODO: replace null with TIMESTAMP_NANOS once parquet support such type annotation.
                if (type.getOriginalType() == null) {
                    if (options.getOption(ExecConstants.PARQUET_READER_INT96_AS_TIMESTAMP).bool_val) {
                        TimeStampWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).timeStamp() : mapWriter.timeStamp(name);
                        return new DrillFixedBinaryToTimeStampConverter(writer);
                    } else {
                        VarBinaryWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).varBinary() : mapWriter.varBinary(name);
                        return new DrillFixedBinaryToVarbinaryConverter(writer, ParquetColumnMetadata.getTypeLengthInBits(type.getPrimitiveTypeName()) / 8, mutator.getManagedBuffer());
                    }
                }
            }
        case FLOAT:
            {
                Float4Writer writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).float4() : mapWriter.float4(name);
                return new DrillFloat4Converter(writer);
            }
        case DOUBLE:
            {
                Float8Writer writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).float8() : mapWriter.float8(name);
                return new DrillFloat8Converter(writer);
            }
        case BOOLEAN:
            {
                BitWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).bit() : mapWriter.bit(name);
                return new DrillBoolConverter(writer);
            }
        case BINARY:
            {
                if (type.getOriginalType() == null) {
                    VarBinaryWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).varBinary() : mapWriter.varBinary(name);
                    return new DrillVarBinaryConverter(writer, mutator.getManagedBuffer());
                }
                switch(type.getOriginalType()) {
                    case UTF8:
                        {
                            VarCharWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).varChar() : mapWriter.varChar(name);
                            return new DrillVarCharConverter(writer, mutator.getManagedBuffer());
                        }
                    case ENUM:
                        {
                            VarCharWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).varChar() : mapWriter.varChar(name);
                            return new DrillVarCharConverter(writer, mutator.getManagedBuffer());
                        }
                    // TODO not sure if BINARY/DECIMAL is actually supported
                    case DECIMAL:
                        {
                            ParquetReaderUtility.checkDecimalTypeEnabled(options);
                            DecimalMetadata metadata = type.getDecimalMetadata();
                            if (metadata.getPrecision() <= 28) {
                                Decimal28SparseWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).decimal28Sparse() : mapWriter.decimal28Sparse(name, metadata.getScale(), metadata.getPrecision());
                                return new DrillBinaryToDecimal28Converter(writer, metadata.getPrecision(), metadata.getScale(), mutator.getManagedBuffer());
                            } else {
                                Decimal38SparseWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).decimal38Sparse() : mapWriter.decimal38Sparse(name, metadata.getScale(), metadata.getPrecision());
                                return new DrillBinaryToDecimal38Converter(writer, metadata.getPrecision(), metadata.getScale(), mutator.getManagedBuffer());
                            }
                        }
                    default:
                        {
                            throw new UnsupportedOperationException("Unsupported type " + type.getOriginalType());
                        }
                }
            }
        case FIXED_LEN_BYTE_ARRAY:
            if (type.getOriginalType() == OriginalType.DECIMAL) {
                ParquetReaderUtility.checkDecimalTypeEnabled(options);
                DecimalMetadata metadata = type.getDecimalMetadata();
                if (metadata.getPrecision() <= 28) {
                    Decimal28SparseWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).decimal28Sparse() : mapWriter.decimal28Sparse(name, metadata.getScale(), metadata.getPrecision());
                    return new DrillBinaryToDecimal28Converter(writer, metadata.getPrecision(), metadata.getScale(), mutator.getManagedBuffer());
                } else {
                    Decimal38SparseWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).decimal38Sparse() : mapWriter.decimal38Sparse(name, metadata.getScale(), metadata.getPrecision());
                    return new DrillBinaryToDecimal38Converter(writer, metadata.getPrecision(), metadata.getScale(), mutator.getManagedBuffer());
                }
            } else if (type.getOriginalType() == OriginalType.INTERVAL) {
                IntervalWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).interval() : mapWriter.interval(name);
                return new DrillFixedLengthByteArrayToInterval(writer);
            } else {
                VarBinaryWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).varBinary() : mapWriter.varBinary(name);
                return new DrillFixedBinaryToVarbinaryConverter(writer, type.getTypeLength(), mutator.getManagedBuffer());
            }
        default:
            throw new UnsupportedOperationException("Unsupported type: " + type.getPrimitiveTypeName());
    }
}
Also used : BitWriter(org.apache.drill.exec.vector.complex.writer.BitWriter) Decimal9Writer(org.apache.drill.exec.vector.complex.writer.Decimal9Writer) Float4Writer(org.apache.drill.exec.vector.complex.writer.Float4Writer) TimeWriter(org.apache.drill.exec.vector.complex.writer.TimeWriter) VarBinaryWriter(org.apache.drill.exec.vector.complex.writer.VarBinaryWriter) TimeStampWriter(org.apache.drill.exec.vector.complex.writer.TimeStampWriter) DateWriter(org.apache.drill.exec.vector.complex.writer.DateWriter) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException) IntervalWriter(org.apache.drill.exec.vector.complex.writer.IntervalWriter) Decimal18Writer(org.apache.drill.exec.vector.complex.writer.Decimal18Writer) BigIntWriter(org.apache.drill.exec.vector.complex.writer.BigIntWriter) IntWriter(org.apache.drill.exec.vector.complex.writer.IntWriter) VarCharWriter(org.apache.drill.exec.vector.complex.writer.VarCharWriter) Float8Writer(org.apache.drill.exec.vector.complex.writer.Float8Writer) BigIntWriter(org.apache.drill.exec.vector.complex.writer.BigIntWriter) DecimalMetadata(org.apache.parquet.schema.DecimalMetadata) Decimal28SparseWriter(org.apache.drill.exec.vector.complex.writer.Decimal28SparseWriter) Decimal38SparseWriter(org.apache.drill.exec.vector.complex.writer.Decimal38SparseWriter)

Example 2 with DecimalMetadata

use of org.apache.parquet.schema.DecimalMetadata in project drill by axbaretto.

the class ParquetRecordWriter method getPrimitiveType.

private PrimitiveType getPrimitiveType(MaterializedField field) {
    MinorType minorType = field.getType().getMinorType();
    String name = field.getName();
    PrimitiveTypeName primitiveTypeName = ParquetTypeHelper.getPrimitiveTypeNameForMinorType(minorType);
    Repetition repetition = ParquetTypeHelper.getRepetitionForDataMode(field.getDataMode());
    OriginalType originalType = ParquetTypeHelper.getOriginalTypeForMinorType(minorType);
    DecimalMetadata decimalMetadata = ParquetTypeHelper.getDecimalMetadataForField(field);
    int length = ParquetTypeHelper.getLengthForMinorType(minorType);
    return new PrimitiveType(repetition, primitiveTypeName, length, name, originalType, decimalMetadata, null);
}
Also used : OriginalType(org.apache.parquet.schema.OriginalType) MinorType(org.apache.drill.common.types.TypeProtos.MinorType) PrimitiveType(org.apache.parquet.schema.PrimitiveType) DecimalMetadata(org.apache.parquet.schema.DecimalMetadata) Repetition(org.apache.parquet.schema.Type.Repetition) PrimitiveTypeName(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName)

Example 3 with DecimalMetadata

use of org.apache.parquet.schema.DecimalMetadata in project drill by apache.

the class ParquetRecordWriter method getPrimitiveType.

protected PrimitiveType getPrimitiveType(MaterializedField field) {
    MinorType minorType = field.getType().getMinorType();
    String name = field.getName();
    int length = ParquetTypeHelper.getLengthForMinorType(minorType);
    PrimitiveTypeName primitiveTypeName = ParquetTypeHelper.getPrimitiveTypeNameForMinorType(minorType);
    if (Types.isDecimalType(minorType)) {
        primitiveTypeName = logicalTypeForDecimals;
        if (usePrimitiveTypesForDecimals) {
            if (field.getPrecision() <= ParquetTypeHelper.getMaxPrecisionForPrimitiveType(PrimitiveTypeName.INT32)) {
                primitiveTypeName = PrimitiveTypeName.INT32;
            } else if (field.getPrecision() <= ParquetTypeHelper.getMaxPrecisionForPrimitiveType(PrimitiveTypeName.INT64)) {
                primitiveTypeName = PrimitiveTypeName.INT64;
            }
        }
        length = DecimalUtility.getMaxBytesSizeForPrecision(field.getPrecision());
    }
    Repetition repetition = ParquetTypeHelper.getRepetitionForDataMode(field.getDataMode());
    OriginalType originalType = ParquetTypeHelper.getOriginalTypeForMinorType(minorType);
    DecimalMetadata decimalMetadata = ParquetTypeHelper.getDecimalMetadataForField(field);
    return new PrimitiveType(repetition, primitiveTypeName, length, name, originalType, decimalMetadata, null);
}
Also used : OriginalType(org.apache.parquet.schema.OriginalType) MinorType(org.apache.drill.common.types.TypeProtos.MinorType) PrimitiveType(org.apache.parquet.schema.PrimitiveType) DecimalMetadata(org.apache.parquet.schema.DecimalMetadata) Repetition(org.apache.parquet.schema.Type.Repetition) PrimitiveTypeName(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName)

Aggregations

DecimalMetadata (org.apache.parquet.schema.DecimalMetadata)3 MinorType (org.apache.drill.common.types.TypeProtos.MinorType)2 OriginalType (org.apache.parquet.schema.OriginalType)2 PrimitiveType (org.apache.parquet.schema.PrimitiveType)2 PrimitiveTypeName (org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName)2 Repetition (org.apache.parquet.schema.Type.Repetition)2 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)1 BigIntWriter (org.apache.drill.exec.vector.complex.writer.BigIntWriter)1 BitWriter (org.apache.drill.exec.vector.complex.writer.BitWriter)1 DateWriter (org.apache.drill.exec.vector.complex.writer.DateWriter)1 Decimal18Writer (org.apache.drill.exec.vector.complex.writer.Decimal18Writer)1 Decimal28SparseWriter (org.apache.drill.exec.vector.complex.writer.Decimal28SparseWriter)1 Decimal38SparseWriter (org.apache.drill.exec.vector.complex.writer.Decimal38SparseWriter)1 Decimal9Writer (org.apache.drill.exec.vector.complex.writer.Decimal9Writer)1 Float4Writer (org.apache.drill.exec.vector.complex.writer.Float4Writer)1 Float8Writer (org.apache.drill.exec.vector.complex.writer.Float8Writer)1 IntWriter (org.apache.drill.exec.vector.complex.writer.IntWriter)1 IntervalWriter (org.apache.drill.exec.vector.complex.writer.IntervalWriter)1 TimeStampWriter (org.apache.drill.exec.vector.complex.writer.TimeStampWriter)1 TimeWriter (org.apache.drill.exec.vector.complex.writer.TimeWriter)1