use of io.druid.collections.bitmap.ImmutableBitmap in project druid by druid-io.
the class Filters method estimateSelectivity.
/**
* Return an estimated selectivity for bitmaps given by an iterator.
*
* @param bitmaps iterator of bitmaps
* @param totalNumRows number of rows in the column associated with this bitmap index
*
* @return estimated selectivity
*/
public static double estimateSelectivity(final Iterator<ImmutableBitmap> bitmaps, final long totalNumRows) {
long numMatchedRows = 0;
while (bitmaps.hasNext()) {
final ImmutableBitmap bitmap = bitmaps.next();
numMatchedRows += bitmap.size();
}
return Math.min(1., (double) numMatchedRows / totalNumRows);
}
use of io.druid.collections.bitmap.ImmutableBitmap in project druid by druid-io.
the class DictionaryEncodedColumnPartSerde method getDeserializer.
@Override
public Deserializer getDeserializer() {
return new Deserializer() {
@Override
public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnConfig) {
final VERSION rVersion = VERSION.fromByte(buffer.get());
final int rFlags;
if (rVersion.compareTo(VERSION.COMPRESSED) >= 0) {
rFlags = buffer.getInt();
} else {
rFlags = rVersion.equals(VERSION.UNCOMPRESSED_MULTI_VALUE) ? Feature.MULTI_VALUE.getMask() : NO_FLAGS;
}
final boolean hasMultipleValues = Feature.MULTI_VALUE.isSet(rFlags) || Feature.MULTI_VALUE_V3.isSet(rFlags);
final GenericIndexed<String> rDictionary = GenericIndexed.read(buffer, GenericIndexed.STRING_STRATEGY, builder.getFileMapper());
builder.setType(ValueType.STRING);
final WritableSupplier<IndexedInts> rSingleValuedColumn;
final WritableSupplier<IndexedMultivalue<IndexedInts>> rMultiValuedColumn;
if (hasMultipleValues) {
rMultiValuedColumn = readMultiValuedColum(rVersion, buffer, rFlags, builder.getFileMapper());
rSingleValuedColumn = null;
} else {
rSingleValuedColumn = readSingleValuedColumn(rVersion, buffer, builder.getFileMapper());
rMultiValuedColumn = null;
}
builder.setHasMultipleValues(hasMultipleValues).setDictionaryEncodedColumn(new DictionaryEncodedColumnSupplier(rDictionary, rSingleValuedColumn, rMultiValuedColumn, columnConfig.columnCacheSizeBytes()));
GenericIndexed<ImmutableBitmap> rBitmaps = GenericIndexed.read(buffer, bitmapSerdeFactory.getObjectStrategy(), builder.getFileMapper());
builder.setBitmapIndex(new BitmapIndexColumnPartSupplier(bitmapSerdeFactory.getBitmapFactory(), rBitmaps, rDictionary));
ImmutableRTree rSpatialIndex = null;
if (buffer.hasRemaining()) {
rSpatialIndex = ByteBufferSerializer.read(buffer, new IndexedRTree.ImmutableRTreeObjectStrategy(bitmapSerdeFactory.getBitmapFactory()));
builder.setSpatialIndex(new SpatialIndexColumnPartSupplier(rSpatialIndex));
}
}
private WritableSupplier<IndexedInts> readSingleValuedColumn(VERSION version, ByteBuffer buffer, SmooshedFileMapper fileMapper) {
switch(version) {
case UNCOMPRESSED_SINGLE_VALUE:
return VSizeIndexedInts.readFromByteBuffer(buffer).asWritableSupplier();
case COMPRESSED:
return CompressedVSizeIntsIndexedSupplier.fromByteBuffer(buffer, byteOrder, fileMapper);
}
throw new IAE("Unsupported single-value version[%s]", version);
}
private WritableSupplier<IndexedMultivalue<IndexedInts>> readMultiValuedColum(VERSION version, ByteBuffer buffer, int flags, SmooshedFileMapper fileMapper) {
switch(version) {
case UNCOMPRESSED_MULTI_VALUE:
return VSizeIndexed.readFromByteBuffer(buffer).asWritableSupplier();
case COMPRESSED:
if (Feature.MULTI_VALUE.isSet(flags)) {
return CompressedVSizeIndexedSupplier.fromByteBuffer(buffer, byteOrder, fileMapper);
} else if (Feature.MULTI_VALUE_V3.isSet(flags)) {
return CompressedVSizeIndexedV3Supplier.fromByteBuffer(buffer, byteOrder, fileMapper);
} else {
throw new IAE("Unrecognized multi-value flag[%d]", flags);
}
}
throw new IAE("Unsupported multi-value version[%s]", version);
}
};
}
use of io.druid.collections.bitmap.ImmutableBitmap in project druid by druid-io.
the class BitmapIterationBenchmark method constructAndIter.
/**
* Benchmark of cumulative cost of construction of an immutable bitmap and then iterating over it. This is a pattern
* from realtime nodes, see {@link io.druid.segment.StringDimensionIndexer#fillBitmapsFromUnsortedEncodedKeyComponent}.
* However this benchmark is yet approximate and to be improved to better reflect actual workloads of realtime nodes.
*/
@Benchmark
public int constructAndIter(ConstructAndIterState state) {
int dataSize = state.dataSize;
int[] data = state.data;
MutableBitmap mutableBitmap = factory.makeEmptyMutableBitmap();
for (int i = 0; i < dataSize; i++) {
mutableBitmap.add(data[i]);
}
ImmutableBitmap bitmap = factory.makeImmutableBitmap(mutableBitmap);
return iter(bitmap);
}
use of io.druid.collections.bitmap.ImmutableBitmap in project druid by druid-io.
the class BoundFilterBenchmark method matchHalfLexicographic.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void matchHalfLexicographic() {
final ImmutableBitmap bitmapIndex = HALF_LEXICOGRAPHIC.getBitmapIndex(selector);
Preconditions.checkState(bitmapIndex.size() > 0 && bitmapIndex.size() < cardinality);
}
use of io.druid.collections.bitmap.ImmutableBitmap in project druid by druid-io.
the class ExtractionDimFilterTest method testNull.
@Test
public void testNull() {
Filter extractionFilter = new SelectorDimFilter("FDHJSFFHDS", "extractDimVal", DIM_EXTRACTION_FN).toFilter();
ImmutableBitmap immutableBitmap = extractionFilter.getBitmapIndex(BITMAP_INDEX_SELECTOR);
Assert.assertEquals(0, immutableBitmap.size());
}
Aggregations