Search in sources :

Example 1 with BloomFilterBuilder

use of io.trino.orc.metadata.statistics.BloomFilterBuilder in project trino by trinodb.

the class ColumnWriters method createColumnWriter.

public static ColumnWriter createColumnWriter(OrcColumnId columnId, ColumnMetadata<OrcType> orcTypes, Type type, CompressionKind compression, int bufferSize, DataSize stringStatisticsLimit, Supplier<BloomFilterBuilder> bloomFilterBuilder) {
    requireNonNull(type, "type is null");
    OrcType orcType = orcTypes.get(columnId);
    if (type instanceof TimeType) {
        TimeType timeType = (TimeType) type;
        checkArgument(timeType.getPrecision() == 6, "%s not supported for ORC writer", type);
        checkArgument(orcType.getOrcTypeKind() == LONG, "wrong ORC type %s for type %s", orcType, type);
        checkArgument("TIME".equals(orcType.getAttributes().get("iceberg.long-type")), "wrong attributes %s for type %s", orcType.getAttributes(), type);
        return new TimeColumnWriter(columnId, type, compression, bufferSize, () -> new IntegerStatisticsBuilder(bloomFilterBuilder.get()));
    }
    switch(orcType.getOrcTypeKind()) {
        case BOOLEAN:
            return new BooleanColumnWriter(columnId, type, compression, bufferSize);
        case FLOAT:
            return new FloatColumnWriter(columnId, type, compression, bufferSize, () -> new DoubleStatisticsBuilder(bloomFilterBuilder.get()));
        case DOUBLE:
            return new DoubleColumnWriter(columnId, type, compression, bufferSize, () -> new DoubleStatisticsBuilder(bloomFilterBuilder.get()));
        case BYTE:
            return new ByteColumnWriter(columnId, type, compression, bufferSize);
        case DATE:
            return new LongColumnWriter(columnId, type, compression, bufferSize, () -> new DateStatisticsBuilder(bloomFilterBuilder.get()));
        case SHORT:
        case INT:
        case LONG:
            return new LongColumnWriter(columnId, type, compression, bufferSize, () -> new IntegerStatisticsBuilder(bloomFilterBuilder.get()));
        case DECIMAL:
            return new DecimalColumnWriter(columnId, type, compression, bufferSize);
        case TIMESTAMP:
        case TIMESTAMP_INSTANT:
            return new TimestampColumnWriter(columnId, type, compression, bufferSize, () -> new TimestampStatisticsBuilder(bloomFilterBuilder.get()));
        case BINARY:
            return new SliceDirectColumnWriter(columnId, type, compression, bufferSize, BinaryStatisticsBuilder::new);
        case CHAR:
        case VARCHAR:
        case STRING:
            return new SliceDictionaryColumnWriter(columnId, type, compression, bufferSize, () -> new StringStatisticsBuilder(toIntExact(stringStatisticsLimit.toBytes()), bloomFilterBuilder.get()));
        case LIST:
            {
                OrcColumnId fieldColumnIndex = orcType.getFieldTypeIndex(0);
                Type fieldType = type.getTypeParameters().get(0);
                ColumnWriter elementWriter = createColumnWriter(fieldColumnIndex, orcTypes, fieldType, compression, bufferSize, stringStatisticsLimit, bloomFilterBuilder);
                return new ListColumnWriter(columnId, compression, bufferSize, elementWriter);
            }
        case MAP:
            {
                ColumnWriter keyWriter = createColumnWriter(orcType.getFieldTypeIndex(0), orcTypes, type.getTypeParameters().get(0), compression, bufferSize, stringStatisticsLimit, bloomFilterBuilder);
                ColumnWriter valueWriter = createColumnWriter(orcType.getFieldTypeIndex(1), orcTypes, type.getTypeParameters().get(1), compression, bufferSize, stringStatisticsLimit, bloomFilterBuilder);
                return new MapColumnWriter(columnId, compression, bufferSize, keyWriter, valueWriter);
            }
        case STRUCT:
            {
                ImmutableList.Builder<ColumnWriter> fieldWriters = ImmutableList.builder();
                for (int fieldId = 0; fieldId < orcType.getFieldCount(); fieldId++) {
                    OrcColumnId fieldColumnIndex = orcType.getFieldTypeIndex(fieldId);
                    Type fieldType = type.getTypeParameters().get(fieldId);
                    fieldWriters.add(createColumnWriter(fieldColumnIndex, orcTypes, fieldType, compression, bufferSize, stringStatisticsLimit, bloomFilterBuilder));
                }
                return new StructColumnWriter(columnId, compression, bufferSize, fieldWriters.build());
            }
        case UNION:
    }
    throw new IllegalArgumentException("Unsupported type: " + type);
}
Also used : StringStatisticsBuilder(io.trino.orc.metadata.statistics.StringStatisticsBuilder) OrcColumnId(io.trino.orc.metadata.OrcColumnId) TimestampStatisticsBuilder(io.trino.orc.metadata.statistics.TimestampStatisticsBuilder) IntegerStatisticsBuilder(io.trino.orc.metadata.statistics.IntegerStatisticsBuilder) DoubleStatisticsBuilder(io.trino.orc.metadata.statistics.DoubleStatisticsBuilder) StringStatisticsBuilder(io.trino.orc.metadata.statistics.StringStatisticsBuilder) BinaryStatisticsBuilder(io.trino.orc.metadata.statistics.BinaryStatisticsBuilder) TimestampStatisticsBuilder(io.trino.orc.metadata.statistics.TimestampStatisticsBuilder) BloomFilterBuilder(io.trino.orc.metadata.statistics.BloomFilterBuilder) DateStatisticsBuilder(io.trino.orc.metadata.statistics.DateStatisticsBuilder) IntegerStatisticsBuilder(io.trino.orc.metadata.statistics.IntegerStatisticsBuilder) TimeType(io.trino.spi.type.TimeType) DateStatisticsBuilder(io.trino.orc.metadata.statistics.DateStatisticsBuilder) BinaryStatisticsBuilder(io.trino.orc.metadata.statistics.BinaryStatisticsBuilder) DoubleStatisticsBuilder(io.trino.orc.metadata.statistics.DoubleStatisticsBuilder) OrcType(io.trino.orc.metadata.OrcType) TimeType(io.trino.spi.type.TimeType) Type(io.trino.spi.type.Type) OrcType(io.trino.orc.metadata.OrcType)

Aggregations

OrcColumnId (io.trino.orc.metadata.OrcColumnId)1 OrcType (io.trino.orc.metadata.OrcType)1 BinaryStatisticsBuilder (io.trino.orc.metadata.statistics.BinaryStatisticsBuilder)1 BloomFilterBuilder (io.trino.orc.metadata.statistics.BloomFilterBuilder)1 DateStatisticsBuilder (io.trino.orc.metadata.statistics.DateStatisticsBuilder)1 DoubleStatisticsBuilder (io.trino.orc.metadata.statistics.DoubleStatisticsBuilder)1 IntegerStatisticsBuilder (io.trino.orc.metadata.statistics.IntegerStatisticsBuilder)1 StringStatisticsBuilder (io.trino.orc.metadata.statistics.StringStatisticsBuilder)1 TimestampStatisticsBuilder (io.trino.orc.metadata.statistics.TimestampStatisticsBuilder)1 TimeType (io.trino.spi.type.TimeType)1 Type (io.trino.spi.type.Type)1