Search in sources :

Example 1 with ColumnBuilder

use of org.apache.druid.segment.column.ColumnBuilder in project druid by druid-io.

the class DictionaryEncodedColumnPartSerde method getDeserializer.

@Override
public Deserializer getDeserializer() {
    return new Deserializer() {

        @Override
        public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnConfig) {
            final VERSION rVersion = VERSION.fromByte(buffer.get());
            final int rFlags;
            if (rVersion.compareTo(VERSION.COMPRESSED) >= 0) {
                rFlags = buffer.getInt();
            } else {
                rFlags = rVersion.equals(VERSION.UNCOMPRESSED_MULTI_VALUE) ? Feature.MULTI_VALUE.getMask() : NO_FLAGS;
            }
            final boolean hasMultipleValues = Feature.MULTI_VALUE.isSet(rFlags) || Feature.MULTI_VALUE_V3.isSet(rFlags);
            // Duplicate the first buffer since we are reading the dictionary twice.
            final GenericIndexed<String> rDictionary = GenericIndexed.read(buffer.duplicate(), GenericIndexed.STRING_STRATEGY, builder.getFileMapper());
            final GenericIndexed<ByteBuffer> rDictionaryUtf8 = GenericIndexed.read(buffer, GenericIndexed.BYTE_BUFFER_STRATEGY, builder.getFileMapper());
            builder.setType(ValueType.STRING);
            final WritableSupplier<ColumnarInts> rSingleValuedColumn;
            final WritableSupplier<ColumnarMultiInts> rMultiValuedColumn;
            if (hasMultipleValues) {
                rMultiValuedColumn = readMultiValuedColumn(rVersion, buffer, rFlags);
                rSingleValuedColumn = null;
            } else {
                rSingleValuedColumn = readSingleValuedColumn(rVersion, buffer);
                rMultiValuedColumn = null;
            }
            final String firstDictionaryEntry = rDictionary.get(0);
            DictionaryEncodedColumnSupplier dictionaryEncodedColumnSupplier = new DictionaryEncodedColumnSupplier(rDictionary, rDictionaryUtf8, rSingleValuedColumn, rMultiValuedColumn, columnConfig.columnCacheSizeBytes());
            builder.setHasMultipleValues(hasMultipleValues).setHasNulls(firstDictionaryEntry == null).setDictionaryEncodedColumnSupplier(dictionaryEncodedColumnSupplier);
            if (!Feature.NO_BITMAP_INDEX.isSet(rFlags)) {
                GenericIndexed<ImmutableBitmap> rBitmaps = GenericIndexed.read(buffer, bitmapSerdeFactory.getObjectStrategy(), builder.getFileMapper());
                builder.setBitmapIndex(new StringBitmapIndexColumnPartSupplier(bitmapSerdeFactory.getBitmapFactory(), rBitmaps, rDictionary));
            }
            if (buffer.hasRemaining()) {
                ImmutableRTree rSpatialIndex = new ImmutableRTreeObjectStrategy(bitmapSerdeFactory.getBitmapFactory()).fromByteBufferWithSize(buffer);
                builder.setSpatialIndex(new SpatialIndexColumnPartSupplier(rSpatialIndex));
            }
        }

        private WritableSupplier<ColumnarInts> readSingleValuedColumn(VERSION version, ByteBuffer buffer) {
            switch(version) {
                case UNCOMPRESSED_SINGLE_VALUE:
                case UNCOMPRESSED_WITH_FLAGS:
                    return VSizeColumnarInts.readFromByteBuffer(buffer);
                case COMPRESSED:
                    return CompressedVSizeColumnarIntsSupplier.fromByteBuffer(buffer, byteOrder);
                default:
                    throw new IAE("Unsupported single-value version[%s]", version);
            }
        }

        private WritableSupplier<ColumnarMultiInts> readMultiValuedColumn(VERSION version, ByteBuffer buffer, int flags) {
            switch(version) {
                case UNCOMPRESSED_MULTI_VALUE:
                    {
                        return VSizeColumnarMultiInts.readFromByteBuffer(buffer);
                    }
                case UNCOMPRESSED_WITH_FLAGS:
                    {
                        if (Feature.MULTI_VALUE.isSet(flags)) {
                            return VSizeColumnarMultiInts.readFromByteBuffer(buffer);
                        } else {
                            throw new IAE("Unrecognized multi-value flag[%d] for version[%s]", flags, version);
                        }
                    }
                case COMPRESSED:
                    {
                        if (Feature.MULTI_VALUE.isSet(flags)) {
                            return CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(buffer, byteOrder);
                        } else if (Feature.MULTI_VALUE_V3.isSet(flags)) {
                            return V3CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(buffer, byteOrder);
                        } else {
                            throw new IAE("Unrecognized multi-value flag[%d] for version[%s]", flags, version);
                        }
                    }
                default:
                    throw new IAE("Unsupported multi-value version[%s]", version);
            }
        }
    };
}
Also used : ColumnConfig(org.apache.druid.segment.column.ColumnConfig) ImmutableBitmap(org.apache.druid.collections.bitmap.ImmutableBitmap) ColumnarInts(org.apache.druid.segment.data.ColumnarInts) VSizeColumnarInts(org.apache.druid.segment.data.VSizeColumnarInts) IAE(org.apache.druid.java.util.common.IAE) ByteBuffer(java.nio.ByteBuffer) ColumnarMultiInts(org.apache.druid.segment.data.ColumnarMultiInts) VSizeColumnarMultiInts(org.apache.druid.segment.data.VSizeColumnarMultiInts) ImmutableRTree(org.apache.druid.collections.spatial.ImmutableRTree) ImmutableRTreeObjectStrategy(org.apache.druid.segment.data.ImmutableRTreeObjectStrategy) ColumnBuilder(org.apache.druid.segment.column.ColumnBuilder)

Example 2 with ColumnBuilder

use of org.apache.druid.segment.column.ColumnBuilder in project druid by druid-io.

the class FloatNumericColumnPartSerde method getDeserializer.

@Override
public Deserializer getDeserializer() {
    return new Deserializer() {

        @Override
        public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnConfig) {
            final CompressedColumnarFloatsSupplier column = CompressedColumnarFloatsSupplier.fromByteBuffer(buffer, byteOrder);
            FloatNumericColumnSupplier columnSupplier = new FloatNumericColumnSupplier(column, IndexIO.LEGACY_FACTORY.getBitmapFactory().makeEmptyImmutableBitmap());
            builder.setType(ValueType.FLOAT).setHasMultipleValues(false).setNumericColumnSupplier(columnSupplier);
        }
    };
}
Also used : CompressedColumnarFloatsSupplier(org.apache.druid.segment.data.CompressedColumnarFloatsSupplier) ColumnConfig(org.apache.druid.segment.column.ColumnConfig) ColumnBuilder(org.apache.druid.segment.column.ColumnBuilder) ByteBuffer(java.nio.ByteBuffer)

Example 3 with ColumnBuilder

use of org.apache.druid.segment.column.ColumnBuilder in project druid by druid-io.

the class LargeColumnSupportedComplexColumnSerializerTest method testSanity.

@Test
public void testSanity() throws IOException {
    HyperUniquesSerdeForTest serde = new HyperUniquesSerdeForTest(Hashing.murmur3_128());
    int[] cases = { 1000, 5000, 10000, 20000 };
    int[] columnSizes = { Integer.MAX_VALUE, Integer.MAX_VALUE / 2, Integer.MAX_VALUE / 4, 5000 * Long.BYTES, 2500 * Long.BYTES };
    for (int columnSize : columnSizes) {
        for (int aCase : cases) {
            File tmpFile = temporaryFolder.newFolder();
            HyperLogLogCollector baseCollector = HyperLogLogCollector.makeLatestCollector();
            try (SegmentWriteOutMedium segmentWriteOutMedium = new OffHeapMemorySegmentWriteOutMedium();
                FileSmoosher v9Smoosher = new FileSmoosher(tmpFile)) {
                LargeColumnSupportedComplexColumnSerializer serializer = LargeColumnSupportedComplexColumnSerializer.createWithColumnSize(segmentWriteOutMedium, "test", serde.getObjectStrategy(), columnSize);
                serializer.open();
                for (int i = 0; i < aCase; i++) {
                    HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
                    byte[] hashBytes = fn.hashLong(i).asBytes();
                    collector.add(hashBytes);
                    baseCollector.fold(collector);
                    serializer.serialize(new ObjectColumnSelector() {

                        @Nullable
                        @Override
                        public Object getObject() {
                            return collector;
                        }

                        @Override
                        public Class classOfObject() {
                            return HyperLogLogCollector.class;
                        }

                        @Override
                        public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
                        // doesn't matter in tests
                        }
                    });
                }
                try (final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter("test", serializer.getSerializedSize())) {
                    serializer.writeTo(channel, v9Smoosher);
                }
            }
            SmooshedFileMapper mapper = Smoosh.map(tmpFile);
            final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.COMPLEX).setHasMultipleValues(false).setFileMapper(mapper);
            serde.deserializeColumn(mapper.mapFile("test"), builder, null);
            ColumnHolder columnHolder = builder.build();
            ComplexColumn complexColumn = (ComplexColumn) columnHolder.getColumn();
            HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
            for (int i = 0; i < aCase; i++) {
                collector.fold((HyperLogLogCollector) complexColumn.getRowValue(i));
            }
            Assert.assertEquals(baseCollector.estimateCardinality(), collector.estimateCardinality(), 0.0);
        }
    }
}
Also used : SmooshedWriter(org.apache.druid.java.util.common.io.smoosh.SmooshedWriter) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) RuntimeShapeInspector(org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector) SegmentWriteOutMedium(org.apache.druid.segment.writeout.SegmentWriteOutMedium) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) ColumnBuilder(org.apache.druid.segment.column.ColumnBuilder) File(java.io.File) Nullable(javax.annotation.Nullable) SmooshedFileMapper(org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper) ComplexColumn(org.apache.druid.segment.column.ComplexColumn) ObjectColumnSelector(org.apache.druid.segment.ObjectColumnSelector) Test(org.junit.Test)

Aggregations

ColumnBuilder (org.apache.druid.segment.column.ColumnBuilder)3 ByteBuffer (java.nio.ByteBuffer)2 ColumnConfig (org.apache.druid.segment.column.ColumnConfig)2 File (java.io.File)1 Nullable (javax.annotation.Nullable)1 ImmutableBitmap (org.apache.druid.collections.bitmap.ImmutableBitmap)1 ImmutableRTree (org.apache.druid.collections.spatial.ImmutableRTree)1 HyperLogLogCollector (org.apache.druid.hll.HyperLogLogCollector)1 IAE (org.apache.druid.java.util.common.IAE)1 FileSmoosher (org.apache.druid.java.util.common.io.smoosh.FileSmoosher)1 SmooshedFileMapper (org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper)1 SmooshedWriter (org.apache.druid.java.util.common.io.smoosh.SmooshedWriter)1 RuntimeShapeInspector (org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector)1 ObjectColumnSelector (org.apache.druid.segment.ObjectColumnSelector)1 ColumnHolder (org.apache.druid.segment.column.ColumnHolder)1 ComplexColumn (org.apache.druid.segment.column.ComplexColumn)1 ColumnarInts (org.apache.druid.segment.data.ColumnarInts)1 ColumnarMultiInts (org.apache.druid.segment.data.ColumnarMultiInts)1 CompressedColumnarFloatsSupplier (org.apache.druid.segment.data.CompressedColumnarFloatsSupplier)1 ImmutableRTreeObjectStrategy (org.apache.druid.segment.data.ImmutableRTreeObjectStrategy)1