use of org.apache.parquet.schema.DecimalMetadata in project drill by axbaretto.
the class DrillParquetGroupConverter method getConverterForType.
@SuppressWarnings("resource")
private PrimitiveConverter getConverterForType(String name, PrimitiveType type) {
switch(type.getPrimitiveTypeName()) {
case INT32:
{
if (type.getOriginalType() == null) {
IntWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).integer() : mapWriter.integer(name);
return new DrillIntConverter(writer);
}
switch(type.getOriginalType()) {
case UINT_8:
case UINT_16:
case UINT_32:
case INT_8:
case INT_16:
case INT_32:
{
IntWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).integer() : mapWriter.integer(name);
return new DrillIntConverter(writer);
}
case DECIMAL:
{
ParquetReaderUtility.checkDecimalTypeEnabled(options);
Decimal9Writer writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).decimal9() : mapWriter.decimal9(name, type.getDecimalMetadata().getScale(), type.getDecimalMetadata().getPrecision());
return new DrillDecimal9Converter(writer, type.getDecimalMetadata().getPrecision(), type.getDecimalMetadata().getScale());
}
case DATE:
{
DateWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).date() : mapWriter.date(name);
switch(containsCorruptedDates) {
case META_SHOWS_CORRUPTION:
return new DrillCorruptedDateConverter(writer);
case META_SHOWS_NO_CORRUPTION:
return new DrillDateConverter(writer);
case META_UNCLEAR_TEST_VALUES:
return new CorruptionDetectingDateConverter(writer);
default:
throw new DrillRuntimeException(String.format("Issue setting up parquet reader for date type, " + "unrecognized date corruption status %s. See DRILL-4203 for more info.", containsCorruptedDates));
}
}
case TIME_MILLIS:
{
TimeWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).time() : mapWriter.time(name);
return new DrillTimeConverter(writer);
}
default:
{
throw new UnsupportedOperationException("Unsupported type: " + type.getOriginalType());
}
}
}
case INT64:
{
if (type.getOriginalType() == null) {
BigIntWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).bigInt() : mapWriter.bigInt(name);
return new DrillBigIntConverter(writer);
}
switch(type.getOriginalType()) {
case UINT_64:
case INT_64:
{
BigIntWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).bigInt() : mapWriter.bigInt(name);
return new DrillBigIntConverter(writer);
}
case DECIMAL:
{
ParquetReaderUtility.checkDecimalTypeEnabled(options);
Decimal18Writer writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).decimal18() : mapWriter.decimal18(name, type.getDecimalMetadata().getScale(), type.getDecimalMetadata().getPrecision());
return new DrillDecimal18Converter(writer, type.getDecimalMetadata().getPrecision(), type.getDecimalMetadata().getScale());
}
case TIMESTAMP_MILLIS:
{
TimeStampWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).timeStamp() : mapWriter.timeStamp(name);
return new DrillTimeStampConverter(writer);
}
default:
{
throw new UnsupportedOperationException("Unsupported type " + type.getOriginalType());
}
}
}
case INT96:
{
// TODO: replace null with TIMESTAMP_NANOS once parquet support such type annotation.
if (type.getOriginalType() == null) {
if (options.getOption(ExecConstants.PARQUET_READER_INT96_AS_TIMESTAMP).bool_val) {
TimeStampWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).timeStamp() : mapWriter.timeStamp(name);
return new DrillFixedBinaryToTimeStampConverter(writer);
} else {
VarBinaryWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).varBinary() : mapWriter.varBinary(name);
return new DrillFixedBinaryToVarbinaryConverter(writer, ParquetColumnMetadata.getTypeLengthInBits(type.getPrimitiveTypeName()) / 8, mutator.getManagedBuffer());
}
}
}
case FLOAT:
{
Float4Writer writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).float4() : mapWriter.float4(name);
return new DrillFloat4Converter(writer);
}
case DOUBLE:
{
Float8Writer writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).float8() : mapWriter.float8(name);
return new DrillFloat8Converter(writer);
}
case BOOLEAN:
{
BitWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).bit() : mapWriter.bit(name);
return new DrillBoolConverter(writer);
}
case BINARY:
{
if (type.getOriginalType() == null) {
VarBinaryWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).varBinary() : mapWriter.varBinary(name);
return new DrillVarBinaryConverter(writer, mutator.getManagedBuffer());
}
switch(type.getOriginalType()) {
case UTF8:
{
VarCharWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).varChar() : mapWriter.varChar(name);
return new DrillVarCharConverter(writer, mutator.getManagedBuffer());
}
case ENUM:
{
VarCharWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).varChar() : mapWriter.varChar(name);
return new DrillVarCharConverter(writer, mutator.getManagedBuffer());
}
// TODO not sure if BINARY/DECIMAL is actually supported
case DECIMAL:
{
ParquetReaderUtility.checkDecimalTypeEnabled(options);
DecimalMetadata metadata = type.getDecimalMetadata();
if (metadata.getPrecision() <= 28) {
Decimal28SparseWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).decimal28Sparse() : mapWriter.decimal28Sparse(name, metadata.getScale(), metadata.getPrecision());
return new DrillBinaryToDecimal28Converter(writer, metadata.getPrecision(), metadata.getScale(), mutator.getManagedBuffer());
} else {
Decimal38SparseWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).decimal38Sparse() : mapWriter.decimal38Sparse(name, metadata.getScale(), metadata.getPrecision());
return new DrillBinaryToDecimal38Converter(writer, metadata.getPrecision(), metadata.getScale(), mutator.getManagedBuffer());
}
}
default:
{
throw new UnsupportedOperationException("Unsupported type " + type.getOriginalType());
}
}
}
case FIXED_LEN_BYTE_ARRAY:
if (type.getOriginalType() == OriginalType.DECIMAL) {
ParquetReaderUtility.checkDecimalTypeEnabled(options);
DecimalMetadata metadata = type.getDecimalMetadata();
if (metadata.getPrecision() <= 28) {
Decimal28SparseWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).decimal28Sparse() : mapWriter.decimal28Sparse(name, metadata.getScale(), metadata.getPrecision());
return new DrillBinaryToDecimal28Converter(writer, metadata.getPrecision(), metadata.getScale(), mutator.getManagedBuffer());
} else {
Decimal38SparseWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).decimal38Sparse() : mapWriter.decimal38Sparse(name, metadata.getScale(), metadata.getPrecision());
return new DrillBinaryToDecimal38Converter(writer, metadata.getPrecision(), metadata.getScale(), mutator.getManagedBuffer());
}
} else if (type.getOriginalType() == OriginalType.INTERVAL) {
IntervalWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).interval() : mapWriter.interval(name);
return new DrillFixedLengthByteArrayToInterval(writer);
} else {
VarBinaryWriter writer = type.getRepetition() == Repetition.REPEATED ? mapWriter.list(name).varBinary() : mapWriter.varBinary(name);
return new DrillFixedBinaryToVarbinaryConverter(writer, type.getTypeLength(), mutator.getManagedBuffer());
}
default:
throw new UnsupportedOperationException("Unsupported type: " + type.getPrimitiveTypeName());
}
}
use of org.apache.parquet.schema.DecimalMetadata in project drill by axbaretto.
the class ParquetRecordWriter method getPrimitiveType.
private PrimitiveType getPrimitiveType(MaterializedField field) {
MinorType minorType = field.getType().getMinorType();
String name = field.getName();
PrimitiveTypeName primitiveTypeName = ParquetTypeHelper.getPrimitiveTypeNameForMinorType(minorType);
Repetition repetition = ParquetTypeHelper.getRepetitionForDataMode(field.getDataMode());
OriginalType originalType = ParquetTypeHelper.getOriginalTypeForMinorType(minorType);
DecimalMetadata decimalMetadata = ParquetTypeHelper.getDecimalMetadataForField(field);
int length = ParquetTypeHelper.getLengthForMinorType(minorType);
return new PrimitiveType(repetition, primitiveTypeName, length, name, originalType, decimalMetadata, null);
}
use of org.apache.parquet.schema.DecimalMetadata in project drill by apache.
the class ParquetRecordWriter method getPrimitiveType.
protected PrimitiveType getPrimitiveType(MaterializedField field) {
MinorType minorType = field.getType().getMinorType();
String name = field.getName();
int length = ParquetTypeHelper.getLengthForMinorType(minorType);
PrimitiveTypeName primitiveTypeName = ParquetTypeHelper.getPrimitiveTypeNameForMinorType(minorType);
if (Types.isDecimalType(minorType)) {
primitiveTypeName = logicalTypeForDecimals;
if (usePrimitiveTypesForDecimals) {
if (field.getPrecision() <= ParquetTypeHelper.getMaxPrecisionForPrimitiveType(PrimitiveTypeName.INT32)) {
primitiveTypeName = PrimitiveTypeName.INT32;
} else if (field.getPrecision() <= ParquetTypeHelper.getMaxPrecisionForPrimitiveType(PrimitiveTypeName.INT64)) {
primitiveTypeName = PrimitiveTypeName.INT64;
}
}
length = DecimalUtility.getMaxBytesSizeForPrecision(field.getPrecision());
}
Repetition repetition = ParquetTypeHelper.getRepetitionForDataMode(field.getDataMode());
OriginalType originalType = ParquetTypeHelper.getOriginalTypeForMinorType(minorType);
DecimalMetadata decimalMetadata = ParquetTypeHelper.getDecimalMetadataForField(field);
return new PrimitiveType(repetition, primitiveTypeName, length, name, originalType, decimalMetadata, null);
}
Aggregations