use of org.apache.druid.segment.column.ColumnBuilder in project druid by druid-io.
the class DictionaryEncodedColumnPartSerde method getDeserializer.
@Override
public Deserializer getDeserializer() {
return new Deserializer() {
@Override
public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnConfig) {
final VERSION rVersion = VERSION.fromByte(buffer.get());
final int rFlags;
if (rVersion.compareTo(VERSION.COMPRESSED) >= 0) {
rFlags = buffer.getInt();
} else {
rFlags = rVersion.equals(VERSION.UNCOMPRESSED_MULTI_VALUE) ? Feature.MULTI_VALUE.getMask() : NO_FLAGS;
}
final boolean hasMultipleValues = Feature.MULTI_VALUE.isSet(rFlags) || Feature.MULTI_VALUE_V3.isSet(rFlags);
// Duplicate the first buffer since we are reading the dictionary twice.
final GenericIndexed<String> rDictionary = GenericIndexed.read(buffer.duplicate(), GenericIndexed.STRING_STRATEGY, builder.getFileMapper());
final GenericIndexed<ByteBuffer> rDictionaryUtf8 = GenericIndexed.read(buffer, GenericIndexed.BYTE_BUFFER_STRATEGY, builder.getFileMapper());
builder.setType(ValueType.STRING);
final WritableSupplier<ColumnarInts> rSingleValuedColumn;
final WritableSupplier<ColumnarMultiInts> rMultiValuedColumn;
if (hasMultipleValues) {
rMultiValuedColumn = readMultiValuedColumn(rVersion, buffer, rFlags);
rSingleValuedColumn = null;
} else {
rSingleValuedColumn = readSingleValuedColumn(rVersion, buffer);
rMultiValuedColumn = null;
}
final String firstDictionaryEntry = rDictionary.get(0);
DictionaryEncodedColumnSupplier dictionaryEncodedColumnSupplier = new DictionaryEncodedColumnSupplier(rDictionary, rDictionaryUtf8, rSingleValuedColumn, rMultiValuedColumn, columnConfig.columnCacheSizeBytes());
builder.setHasMultipleValues(hasMultipleValues).setHasNulls(firstDictionaryEntry == null).setDictionaryEncodedColumnSupplier(dictionaryEncodedColumnSupplier);
if (!Feature.NO_BITMAP_INDEX.isSet(rFlags)) {
GenericIndexed<ImmutableBitmap> rBitmaps = GenericIndexed.read(buffer, bitmapSerdeFactory.getObjectStrategy(), builder.getFileMapper());
builder.setBitmapIndex(new StringBitmapIndexColumnPartSupplier(bitmapSerdeFactory.getBitmapFactory(), rBitmaps, rDictionary));
}
if (buffer.hasRemaining()) {
ImmutableRTree rSpatialIndex = new ImmutableRTreeObjectStrategy(bitmapSerdeFactory.getBitmapFactory()).fromByteBufferWithSize(buffer);
builder.setSpatialIndex(new SpatialIndexColumnPartSupplier(rSpatialIndex));
}
}
private WritableSupplier<ColumnarInts> readSingleValuedColumn(VERSION version, ByteBuffer buffer) {
switch(version) {
case UNCOMPRESSED_SINGLE_VALUE:
case UNCOMPRESSED_WITH_FLAGS:
return VSizeColumnarInts.readFromByteBuffer(buffer);
case COMPRESSED:
return CompressedVSizeColumnarIntsSupplier.fromByteBuffer(buffer, byteOrder);
default:
throw new IAE("Unsupported single-value version[%s]", version);
}
}
private WritableSupplier<ColumnarMultiInts> readMultiValuedColumn(VERSION version, ByteBuffer buffer, int flags) {
switch(version) {
case UNCOMPRESSED_MULTI_VALUE:
{
return VSizeColumnarMultiInts.readFromByteBuffer(buffer);
}
case UNCOMPRESSED_WITH_FLAGS:
{
if (Feature.MULTI_VALUE.isSet(flags)) {
return VSizeColumnarMultiInts.readFromByteBuffer(buffer);
} else {
throw new IAE("Unrecognized multi-value flag[%d] for version[%s]", flags, version);
}
}
case COMPRESSED:
{
if (Feature.MULTI_VALUE.isSet(flags)) {
return CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(buffer, byteOrder);
} else if (Feature.MULTI_VALUE_V3.isSet(flags)) {
return V3CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(buffer, byteOrder);
} else {
throw new IAE("Unrecognized multi-value flag[%d] for version[%s]", flags, version);
}
}
default:
throw new IAE("Unsupported multi-value version[%s]", version);
}
}
};
}
use of org.apache.druid.segment.column.ColumnBuilder in project druid by druid-io.
the class FloatNumericColumnPartSerde method getDeserializer.
@Override
public Deserializer getDeserializer() {
return new Deserializer() {
@Override
public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnConfig) {
final CompressedColumnarFloatsSupplier column = CompressedColumnarFloatsSupplier.fromByteBuffer(buffer, byteOrder);
FloatNumericColumnSupplier columnSupplier = new FloatNumericColumnSupplier(column, IndexIO.LEGACY_FACTORY.getBitmapFactory().makeEmptyImmutableBitmap());
builder.setType(ValueType.FLOAT).setHasMultipleValues(false).setNumericColumnSupplier(columnSupplier);
}
};
}
use of org.apache.druid.segment.column.ColumnBuilder in project druid by druid-io.
the class LargeColumnSupportedComplexColumnSerializerTest method testSanity.
@Test
public void testSanity() throws IOException {
HyperUniquesSerdeForTest serde = new HyperUniquesSerdeForTest(Hashing.murmur3_128());
int[] cases = { 1000, 5000, 10000, 20000 };
int[] columnSizes = { Integer.MAX_VALUE, Integer.MAX_VALUE / 2, Integer.MAX_VALUE / 4, 5000 * Long.BYTES, 2500 * Long.BYTES };
for (int columnSize : columnSizes) {
for (int aCase : cases) {
File tmpFile = temporaryFolder.newFolder();
HyperLogLogCollector baseCollector = HyperLogLogCollector.makeLatestCollector();
try (SegmentWriteOutMedium segmentWriteOutMedium = new OffHeapMemorySegmentWriteOutMedium();
FileSmoosher v9Smoosher = new FileSmoosher(tmpFile)) {
LargeColumnSupportedComplexColumnSerializer serializer = LargeColumnSupportedComplexColumnSerializer.createWithColumnSize(segmentWriteOutMedium, "test", serde.getObjectStrategy(), columnSize);
serializer.open();
for (int i = 0; i < aCase; i++) {
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
byte[] hashBytes = fn.hashLong(i).asBytes();
collector.add(hashBytes);
baseCollector.fold(collector);
serializer.serialize(new ObjectColumnSelector() {
@Nullable
@Override
public Object getObject() {
return collector;
}
@Override
public Class classOfObject() {
return HyperLogLogCollector.class;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
// doesn't matter in tests
}
});
}
try (final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter("test", serializer.getSerializedSize())) {
serializer.writeTo(channel, v9Smoosher);
}
}
SmooshedFileMapper mapper = Smoosh.map(tmpFile);
final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.COMPLEX).setHasMultipleValues(false).setFileMapper(mapper);
serde.deserializeColumn(mapper.mapFile("test"), builder, null);
ColumnHolder columnHolder = builder.build();
ComplexColumn complexColumn = (ComplexColumn) columnHolder.getColumn();
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
for (int i = 0; i < aCase; i++) {
collector.fold((HyperLogLogCollector) complexColumn.getRowValue(i));
}
Assert.assertEquals(baseCollector.estimateCardinality(), collector.estimateCardinality(), 0.0);
}
}
}
Aggregations