Search in sources :

Example 11 with RoaringBitmapSerdeFactory

use of org.apache.druid.segment.data.RoaringBitmapSerdeFactory in project druid by druid-io.

the class LikeFilterBenchmark method setup.

@Setup
public void setup() {
    step = (END_INT - START_INT) / cardinality;
    final BitmapFactory bitmapFactory = new RoaringBitmapFactory();
    final BitmapSerdeFactory serdeFactory = new RoaringBitmapSerdeFactory(null);
    final List<Integer> ints = generateInts();
    final GenericIndexed<String> dictionary = GenericIndexed.fromIterable(FluentIterable.from(ints).transform(new Function<Integer, String>() {

        @Override
        public String apply(Integer i) {
            return i.toString();
        }
    }), GenericIndexed.STRING_STRATEGY);
    final BitmapIndex bitmapIndex = new StringBitmapIndexColumnPartSupplier(bitmapFactory, GenericIndexed.fromIterable(FluentIterable.from(ints).transform(new Function<Integer, ImmutableBitmap>() {

        @Override
        public ImmutableBitmap apply(Integer i) {
            final MutableBitmap mutableBitmap = bitmapFactory.makeEmptyMutableBitmap();
            mutableBitmap.add((i - START_INT) / step);
            return bitmapFactory.makeImmutableBitmap(mutableBitmap);
        }
    }), serdeFactory.getObjectStrategy()), dictionary).get();
    selector = new MockBitmapIndexSelector(dictionary, bitmapFactory, bitmapIndex);
}
Also used : StringBitmapIndexColumnPartSupplier(org.apache.druid.segment.serde.StringBitmapIndexColumnPartSupplier) MutableBitmap(org.apache.druid.collections.bitmap.MutableBitmap) BitmapIndex(org.apache.druid.segment.column.BitmapIndex) Function(com.google.common.base.Function) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) BitmapFactory(org.apache.druid.collections.bitmap.BitmapFactory) RoaringBitmapFactory(org.apache.druid.collections.bitmap.RoaringBitmapFactory) RoaringBitmapFactory(org.apache.druid.collections.bitmap.RoaringBitmapFactory) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) BitmapSerdeFactory(org.apache.druid.segment.data.BitmapSerdeFactory) Setup(org.openjdk.jmh.annotations.Setup)

Example 12 with RoaringBitmapSerdeFactory

use of org.apache.druid.segment.data.RoaringBitmapSerdeFactory in project hive by apache.

the class TestDruidRecordWriter method testWrite.

// Test is failing due to Guava dependency, Druid 0.13.0 should have less dependency on Guava
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
    final String dataSourceName = "testDataSource";
    final File segmentOutputDir = temporaryFolder.newFolder();
    final File workingDir = temporaryFolder.newFolder();
    Configuration config = new Configuration();
    final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidConstants.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("host")), null, null)));
    final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, new TypeReference<Map<String, Object>>() {
    });
    DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, objectMapper);
    IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
    RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, null, null, null, temporaryFolder.newFolder(), null, null, null, null, indexSpec, null, null, 0, 0, null, null, 0L, null, null);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {

        @Override
        public File getStorageDirectory() {
            return segmentOutputDir;
        }
    });
    Path segmentDescriptorPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
    DruidRecordWriter druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptorPath, localFileSystem);
    List<DruidWritable> druidWritables = expectedRows.stream().map(input -> new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularities.DAY.bucketStart(new DateTime((long) input.get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build())).collect(Collectors.toList());
    for (DruidWritable druidWritable : druidWritables) {
        druidRecordWriter.write(druidWritable);
    }
    druidRecordWriter.close(false);
    List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getCreatedSegments(segmentDescriptorPath, config);
    Assert.assertEquals(1, dataSegmentList.size());
    File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
    new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
    final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
    QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
    Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), null, ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null);
    List<InputRow> rows = Lists.newArrayList();
    while (firehose.hasMore()) {
        rows.add(firehose.nextRow());
    }
    verifyRows(expectedRows, rows);
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) DruidConstants(org.apache.hadoop.hive.druid.conf.DruidConstants) IndexSpec(org.apache.druid.segment.IndexSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DruidRecordWriter(org.apache.hadoop.hive.druid.io.DruidRecordWriter) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) TypeReference(com.fasterxml.jackson.core.type.TypeReference) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) ImmutableMap(com.google.common.collect.ImmutableMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) QueryableIndex(org.apache.druid.segment.QueryableIndex) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) Collectors(java.util.stream.Collectors) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) InputRow(org.apache.druid.data.input.InputRow) Firehose(org.apache.druid.data.input.Firehose) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DataSegment(org.apache.druid.timeline.DataSegment) DruidTable(org.apache.calcite.adapter.druid.DruidTable) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) DruidStorageHandlerUtils(org.apache.hadoop.hive.druid.DruidStorageHandlerUtils) Constants(org.apache.hadoop.hive.conf.Constants) DruidStorageHandler(org.apache.hadoop.hive.druid.DruidStorageHandler) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) File(java.io.File) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Rule(org.junit.Rule) Ignore(org.junit.Ignore) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) Assert(org.junit.Assert) DataSchema(org.apache.druid.segment.indexing.DataSchema) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) TemporaryFolder(org.junit.rules.TemporaryFolder) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) IndexSpec(org.apache.druid.segment.IndexSpec) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) Configuration(org.apache.hadoop.conf.Configuration) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DataSegment(org.apache.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) Path(org.apache.hadoop.fs.Path) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) Firehose(org.apache.druid.data.input.Firehose) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DataSchema(org.apache.druid.segment.indexing.DataSchema) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) QueryableIndex(org.apache.druid.segment.QueryableIndex) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) File(java.io.File) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) DruidRecordWriter(org.apache.hadoop.hive.druid.io.DruidRecordWriter) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 13 with RoaringBitmapSerdeFactory

use of org.apache.druid.segment.data.RoaringBitmapSerdeFactory in project druid by druid-io.

the class BoundFilterBenchmark method setup.

@Setup
public void setup() {
    step = (END_INT - START_INT) / cardinality;
    final BitmapFactory bitmapFactory = new RoaringBitmapFactory();
    final BitmapSerdeFactory serdeFactory = new RoaringBitmapSerdeFactory(null);
    final List<Integer> ints = generateInts();
    final GenericIndexed<String> dictionary = GenericIndexed.fromIterable(FluentIterable.from(ints).transform(i -> i.toString()), GenericIndexed.STRING_STRATEGY);
    final BitmapIndex bitmapIndex = new StringBitmapIndexColumnPartSupplier(bitmapFactory, GenericIndexed.fromIterable(FluentIterable.from(ints).transform(new Function<Integer, ImmutableBitmap>() {

        @Override
        public ImmutableBitmap apply(Integer i) {
            final MutableBitmap mutableBitmap = bitmapFactory.makeEmptyMutableBitmap();
            mutableBitmap.add((i - START_INT) / step);
            return bitmapFactory.makeImmutableBitmap(mutableBitmap);
        }
    }), serdeFactory.getObjectStrategy()), dictionary).get();
    selector = new MockBitmapIndexSelector(dictionary, bitmapFactory, bitmapIndex);
}
Also used : BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Measurement(org.openjdk.jmh.annotations.Measurement) Scope(org.openjdk.jmh.annotations.Scope) BitmapIndex(org.apache.druid.segment.column.BitmapIndex) ImmutableBitmap(org.apache.druid.collections.bitmap.ImmutableBitmap) Warmup(org.openjdk.jmh.annotations.Warmup) ArrayList(java.util.ArrayList) StringBitmapIndexColumnPartSupplier(org.apache.druid.segment.serde.StringBitmapIndexColumnPartSupplier) FluentIterable(com.google.common.collect.FluentIterable) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit) BitmapFactory(org.apache.druid.collections.bitmap.BitmapFactory) StringComparators(org.apache.druid.query.ordering.StringComparators) GenericIndexed(org.apache.druid.segment.data.GenericIndexed) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) MutableBitmap(org.apache.druid.collections.bitmap.MutableBitmap) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) Setup(org.openjdk.jmh.annotations.Setup) Function(com.google.common.base.Function) Mode(org.openjdk.jmh.annotations.Mode) Param(org.openjdk.jmh.annotations.Param) State(org.openjdk.jmh.annotations.State) Benchmark(org.openjdk.jmh.annotations.Benchmark) RoaringBitmapFactory(org.apache.druid.collections.bitmap.RoaringBitmapFactory) TimeUnit(java.util.concurrent.TimeUnit) BitmapSerdeFactory(org.apache.druid.segment.data.BitmapSerdeFactory) List(java.util.List) NullHandling(org.apache.druid.common.config.NullHandling) BoundFilter(org.apache.druid.segment.filter.BoundFilter) Fork(org.openjdk.jmh.annotations.Fork) Preconditions(com.google.common.base.Preconditions) ConciseSetUtils(org.apache.druid.extendedset.intset.ConciseSetUtils) BitmapIndexSelector(org.apache.druid.query.filter.BitmapIndexSelector) StringBitmapIndexColumnPartSupplier(org.apache.druid.segment.serde.StringBitmapIndexColumnPartSupplier) MutableBitmap(org.apache.druid.collections.bitmap.MutableBitmap) BitmapIndex(org.apache.druid.segment.column.BitmapIndex) Function(com.google.common.base.Function) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) BitmapFactory(org.apache.druid.collections.bitmap.BitmapFactory) RoaringBitmapFactory(org.apache.druid.collections.bitmap.RoaringBitmapFactory) RoaringBitmapFactory(org.apache.druid.collections.bitmap.RoaringBitmapFactory) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) BitmapSerdeFactory(org.apache.druid.segment.data.BitmapSerdeFactory) Setup(org.openjdk.jmh.annotations.Setup)

Example 14 with RoaringBitmapSerdeFactory

use of org.apache.druid.segment.data.RoaringBitmapSerdeFactory in project druid by druid-io.

the class DimensionPredicateFilterBenchmark method setup.

@Setup
public void setup() {
    final BitmapFactory bitmapFactory = new RoaringBitmapFactory();
    final BitmapSerdeFactory serdeFactory = new RoaringBitmapSerdeFactory(null);
    final List<Integer> ints = generateInts();
    final GenericIndexed<String> dictionary = GenericIndexed.fromIterable(FluentIterable.from(ints).transform(new Function<Integer, String>() {

        @Override
        public String apply(Integer i) {
            return i.toString();
        }
    }), GenericIndexed.STRING_STRATEGY);
    final BitmapIndex bitmapIndex = new StringBitmapIndexColumnPartSupplier(bitmapFactory, GenericIndexed.fromIterable(FluentIterable.from(ints).transform(new Function<Integer, ImmutableBitmap>() {

        @Override
        public ImmutableBitmap apply(Integer i) {
            final MutableBitmap mutableBitmap = bitmapFactory.makeEmptyMutableBitmap();
            mutableBitmap.add(i - START_INT);
            return bitmapFactory.makeImmutableBitmap(mutableBitmap);
        }
    }), serdeFactory.getObjectStrategy()), dictionary).get();
    selector = new MockBitmapIndexSelector(dictionary, bitmapFactory, bitmapIndex);
}
Also used : StringBitmapIndexColumnPartSupplier(org.apache.druid.segment.serde.StringBitmapIndexColumnPartSupplier) MutableBitmap(org.apache.druid.collections.bitmap.MutableBitmap) BitmapIndex(org.apache.druid.segment.column.BitmapIndex) Function(com.google.common.base.Function) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) BitmapFactory(org.apache.druid.collections.bitmap.BitmapFactory) RoaringBitmapFactory(org.apache.druid.collections.bitmap.RoaringBitmapFactory) RoaringBitmapFactory(org.apache.druid.collections.bitmap.RoaringBitmapFactory) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) BitmapSerdeFactory(org.apache.druid.segment.data.BitmapSerdeFactory) Setup(org.openjdk.jmh.annotations.Setup)

Example 15 with RoaringBitmapSerdeFactory

use of org.apache.druid.segment.data.RoaringBitmapSerdeFactory in project druid by druid-io.

the class ParallelIndexTuningConfigTest method testSerdeWithMaxNumSubTasksAndMaxNumConcurrentSubTasks.

@Test
public void testSerdeWithMaxNumSubTasksAndMaxNumConcurrentSubTasks() {
    expectedException.expect(IllegalArgumentException.class);
    expectedException.expectMessage("Can't use both maxNumSubTasks and maxNumConcurrentSubTasks");
    final int maxNumSubTasks = 250;
    final ParallelIndexTuningConfig tuningConfig = new ParallelIndexTuningConfig(null, null, null, 10, 1000L, null, null, null, null, new DynamicPartitionsSpec(100, 100L), new IndexSpec(new RoaringBitmapSerdeFactory(true), CompressionStrategy.UNCOMPRESSED, CompressionStrategy.LZF, LongEncodingStrategy.LONGS), new IndexSpec(), 1, false, true, 10000L, OffHeapMemorySegmentWriteOutMediumFactory.instance(), maxNumSubTasks, maxNumSubTasks, 100, 20L, new Duration(3600), 128, null, null, false, null, null, null, null, null);
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) Duration(org.joda.time.Duration) Test(org.junit.Test)

Aggregations

RoaringBitmapSerdeFactory (org.apache.druid.segment.data.RoaringBitmapSerdeFactory)31 IndexSpec (org.apache.druid.segment.IndexSpec)25 Test (org.junit.Test)25 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)10 Duration (org.joda.time.Duration)9 IndexTuningConfig (org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig)8 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)5 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)5 BitmapSerdeFactory (org.apache.druid.segment.data.BitmapSerdeFactory)5 Function (com.google.common.base.Function)4 BitmapFactory (org.apache.druid.collections.bitmap.BitmapFactory)4 RoaringBitmapFactory (org.apache.druid.collections.bitmap.RoaringBitmapFactory)4 HashedPartitionsSpec (org.apache.druid.indexer.partitions.HashedPartitionsSpec)4 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)4 BitmapIndex (org.apache.druid.segment.column.BitmapIndex)4 ArrayList (java.util.ArrayList)3 List (java.util.List)3 MutableBitmap (org.apache.druid.collections.bitmap.MutableBitmap)3 InputRow (org.apache.druid.data.input.InputRow)3