Search in sources :

Example 1 with ColumnarInts

use of org.apache.druid.segment.data.ColumnarInts in project druid by druid-io.

the class DictionaryEncodedColumnPartSerde method getDeserializer.

@Override
public Deserializer getDeserializer() {
    return new Deserializer() {

        @Override
        public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnConfig) {
            final VERSION rVersion = VERSION.fromByte(buffer.get());
            final int rFlags;
            if (rVersion.compareTo(VERSION.COMPRESSED) >= 0) {
                rFlags = buffer.getInt();
            } else {
                rFlags = rVersion.equals(VERSION.UNCOMPRESSED_MULTI_VALUE) ? Feature.MULTI_VALUE.getMask() : NO_FLAGS;
            }
            final boolean hasMultipleValues = Feature.MULTI_VALUE.isSet(rFlags) || Feature.MULTI_VALUE_V3.isSet(rFlags);
            // Duplicate the first buffer since we are reading the dictionary twice.
            final GenericIndexed<String> rDictionary = GenericIndexed.read(buffer.duplicate(), GenericIndexed.STRING_STRATEGY, builder.getFileMapper());
            final GenericIndexed<ByteBuffer> rDictionaryUtf8 = GenericIndexed.read(buffer, GenericIndexed.BYTE_BUFFER_STRATEGY, builder.getFileMapper());
            builder.setType(ValueType.STRING);
            final WritableSupplier<ColumnarInts> rSingleValuedColumn;
            final WritableSupplier<ColumnarMultiInts> rMultiValuedColumn;
            if (hasMultipleValues) {
                rMultiValuedColumn = readMultiValuedColumn(rVersion, buffer, rFlags);
                rSingleValuedColumn = null;
            } else {
                rSingleValuedColumn = readSingleValuedColumn(rVersion, buffer);
                rMultiValuedColumn = null;
            }
            final String firstDictionaryEntry = rDictionary.get(0);
            DictionaryEncodedColumnSupplier dictionaryEncodedColumnSupplier = new DictionaryEncodedColumnSupplier(rDictionary, rDictionaryUtf8, rSingleValuedColumn, rMultiValuedColumn, columnConfig.columnCacheSizeBytes());
            builder.setHasMultipleValues(hasMultipleValues).setHasNulls(firstDictionaryEntry == null).setDictionaryEncodedColumnSupplier(dictionaryEncodedColumnSupplier);
            if (!Feature.NO_BITMAP_INDEX.isSet(rFlags)) {
                GenericIndexed<ImmutableBitmap> rBitmaps = GenericIndexed.read(buffer, bitmapSerdeFactory.getObjectStrategy(), builder.getFileMapper());
                builder.setBitmapIndex(new StringBitmapIndexColumnPartSupplier(bitmapSerdeFactory.getBitmapFactory(), rBitmaps, rDictionary));
            }
            if (buffer.hasRemaining()) {
                ImmutableRTree rSpatialIndex = new ImmutableRTreeObjectStrategy(bitmapSerdeFactory.getBitmapFactory()).fromByteBufferWithSize(buffer);
                builder.setSpatialIndex(new SpatialIndexColumnPartSupplier(rSpatialIndex));
            }
        }

        private WritableSupplier<ColumnarInts> readSingleValuedColumn(VERSION version, ByteBuffer buffer) {
            switch(version) {
                case UNCOMPRESSED_SINGLE_VALUE:
                case UNCOMPRESSED_WITH_FLAGS:
                    return VSizeColumnarInts.readFromByteBuffer(buffer);
                case COMPRESSED:
                    return CompressedVSizeColumnarIntsSupplier.fromByteBuffer(buffer, byteOrder);
                default:
                    throw new IAE("Unsupported single-value version[%s]", version);
            }
        }

        private WritableSupplier<ColumnarMultiInts> readMultiValuedColumn(VERSION version, ByteBuffer buffer, int flags) {
            switch(version) {
                case UNCOMPRESSED_MULTI_VALUE:
                    {
                        return VSizeColumnarMultiInts.readFromByteBuffer(buffer);
                    }
                case UNCOMPRESSED_WITH_FLAGS:
                    {
                        if (Feature.MULTI_VALUE.isSet(flags)) {
                            return VSizeColumnarMultiInts.readFromByteBuffer(buffer);
                        } else {
                            throw new IAE("Unrecognized multi-value flag[%d] for version[%s]", flags, version);
                        }
                    }
                case COMPRESSED:
                    {
                        if (Feature.MULTI_VALUE.isSet(flags)) {
                            return CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(buffer, byteOrder);
                        } else if (Feature.MULTI_VALUE_V3.isSet(flags)) {
                            return V3CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(buffer, byteOrder);
                        } else {
                            throw new IAE("Unrecognized multi-value flag[%d] for version[%s]", flags, version);
                        }
                    }
                default:
                    throw new IAE("Unsupported multi-value version[%s]", version);
            }
        }
    };
}
Also used : ColumnConfig(org.apache.druid.segment.column.ColumnConfig) ImmutableBitmap(org.apache.druid.collections.bitmap.ImmutableBitmap) ColumnarInts(org.apache.druid.segment.data.ColumnarInts) VSizeColumnarInts(org.apache.druid.segment.data.VSizeColumnarInts) IAE(org.apache.druid.java.util.common.IAE) ByteBuffer(java.nio.ByteBuffer) ColumnarMultiInts(org.apache.druid.segment.data.ColumnarMultiInts) VSizeColumnarMultiInts(org.apache.druid.segment.data.VSizeColumnarMultiInts) ImmutableRTree(org.apache.druid.collections.spatial.ImmutableRTree) ImmutableRTreeObjectStrategy(org.apache.druid.segment.data.ImmutableRTreeObjectStrategy) ColumnBuilder(org.apache.druid.segment.column.ColumnBuilder)

Example 2 with ColumnarInts

use of org.apache.druid.segment.data.ColumnarInts in project druid by druid-io.

the class CompressedVSizeColumnarMultiIntsBenchmark method setup.

@Setup
public void setup() throws IOException {
    Random rand = ThreadLocalRandom.current();
    List<int[]> rows = new ArrayList<>();
    final int bound = 1 << bytes;
    for (int i = 0; i < 0x100000; i++) {
        int count = rand.nextInt(valuesPerRowBound) + 1;
        int[] row = new int[rand.nextInt(count)];
        for (int j = 0; j < row.length; j++) {
            row[j] = rand.nextInt(bound);
        }
        rows.add(row);
    }
    final ByteBuffer bufferCompressed = serialize(CompressedVSizeColumnarMultiIntsSupplier.fromIterable(Iterables.transform(rows, (Function<int[], ColumnarInts>) input -> VSizeColumnarInts.fromArray(input, 20)), bound - 1, ByteOrder.nativeOrder(), CompressionStrategy.LZ4, Closer.create()));
    this.compressed = CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(bufferCompressed, ByteOrder.nativeOrder()).get();
    final ByteBuffer bufferUncompressed = serialize(VSizeColumnarMultiInts.fromIterable(Iterables.transform(rows, input -> VSizeColumnarInts.fromArray(input, 20))));
    this.uncompressed = VSizeColumnarMultiInts.readFromByteBuffer(bufferUncompressed);
    filter = new BitSet();
    for (int i = 0; i < filteredRowCount; i++) {
        int rowToAccess = rand.nextInt(rows.size());
        // Skip already selected rows if any
        while (filter.get(rowToAccess)) {
            rowToAccess = (rowToAccess + 1) % rows.size();
        }
        filter.set(rowToAccess);
    }
}
Also used : ColumnarInts(org.apache.druid.segment.data.ColumnarInts) Iterables(com.google.common.collect.Iterables) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) WritableSupplier(org.apache.druid.segment.data.WritableSupplier) Blackhole(org.openjdk.jmh.infra.Blackhole) CompressedVSizeColumnarMultiIntsSupplier(org.apache.druid.segment.data.CompressedVSizeColumnarMultiIntsSupplier) Scope(org.openjdk.jmh.annotations.Scope) Random(java.util.Random) IndexedInts(org.apache.druid.segment.data.IndexedInts) ByteBuffer(java.nio.ByteBuffer) ArrayList(java.util.ArrayList) CompressionStrategy(org.apache.druid.segment.data.CompressionStrategy) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) ColumnarMultiInts(org.apache.druid.segment.data.ColumnarMultiInts) Setup(org.openjdk.jmh.annotations.Setup) Function(com.google.common.base.Function) Mode(org.openjdk.jmh.annotations.Mode) Closer(org.apache.druid.java.util.common.io.Closer) Param(org.openjdk.jmh.annotations.Param) IOException(java.io.IOException) State(org.openjdk.jmh.annotations.State) Benchmark(org.openjdk.jmh.annotations.Benchmark) VSizeColumnarMultiInts(org.apache.druid.segment.data.VSizeColumnarMultiInts) TimeUnit(java.util.concurrent.TimeUnit) ByteOrder(java.nio.ByteOrder) List(java.util.List) NullHandling(org.apache.druid.common.config.NullHandling) WritableByteChannel(java.nio.channels.WritableByteChannel) VSizeColumnarInts(org.apache.druid.segment.data.VSizeColumnarInts) BitSet(java.util.BitSet) Random(java.util.Random) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) ArrayList(java.util.ArrayList) BitSet(java.util.BitSet) ColumnarInts(org.apache.druid.segment.data.ColumnarInts) VSizeColumnarInts(org.apache.druid.segment.data.VSizeColumnarInts) ByteBuffer(java.nio.ByteBuffer) Setup(org.openjdk.jmh.annotations.Setup)

Aggregations

ByteBuffer (java.nio.ByteBuffer)2 ColumnarInts (org.apache.druid.segment.data.ColumnarInts)2 ColumnarMultiInts (org.apache.druid.segment.data.ColumnarMultiInts)2 VSizeColumnarInts (org.apache.druid.segment.data.VSizeColumnarInts)2 VSizeColumnarMultiInts (org.apache.druid.segment.data.VSizeColumnarMultiInts)2 Function (com.google.common.base.Function)1 Iterables (com.google.common.collect.Iterables)1 IOException (java.io.IOException)1 ByteOrder (java.nio.ByteOrder)1 WritableByteChannel (java.nio.channels.WritableByteChannel)1 ArrayList (java.util.ArrayList)1 BitSet (java.util.BitSet)1 List (java.util.List)1 Random (java.util.Random)1 ThreadLocalRandom (java.util.concurrent.ThreadLocalRandom)1 TimeUnit (java.util.concurrent.TimeUnit)1 ImmutableBitmap (org.apache.druid.collections.bitmap.ImmutableBitmap)1 ImmutableRTree (org.apache.druid.collections.spatial.ImmutableRTree)1 NullHandling (org.apache.druid.common.config.NullHandling)1 IAE (org.apache.druid.java.util.common.IAE)1