Search in sources :

Example 6 with QueryableIndexStorageAdapter

use of org.apache.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.

the class CompactionTaskRunTest method getCSVFormatRowsFromSegments.

private List<String> getCSVFormatRowsFromSegments(List<DataSegment> segments) throws Exception {
    final File cacheDir = temporaryFolder.newFolder();
    final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(cacheDir);
    List<Cursor> cursors = new ArrayList<>();
    for (DataSegment segment : segments) {
        final File segmentFile = segmentCacheManager.getSegmentFiles(segment);
        final WindowedStorageAdapter adapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(testUtils.getTestIndexIO().loadIndex(segmentFile)), segment.getInterval());
        final Sequence<Cursor> cursorSequence = adapter.getAdapter().makeCursors(null, segment.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
        cursors.addAll(cursorSequence.toList());
    }
    List<String> rowsFromSegment = new ArrayList<>();
    for (Cursor cursor : cursors) {
        cursor.reset();
        while (!cursor.isDone()) {
            final DimensionSelector selector1 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("ts", "ts"));
            final DimensionSelector selector2 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dim", "dim"));
            final DimensionSelector selector3 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("val", "val"));
            Object dimObject = selector2.getObject();
            String dimVal = null;
            if (dimObject instanceof String) {
                dimVal = (String) dimObject;
            } else if (dimObject instanceof List) {
                dimVal = String.join("|", (List<String>) dimObject);
            }
            rowsFromSegment.add(makeCSVFormatRow(selector1.getObject().toString(), dimVal, selector3.defaultGetObject().toString()));
            cursor.advance();
        }
    }
    return rowsFromSegment;
}
Also used : DimensionSelector(org.apache.druid.segment.DimensionSelector) ArrayList(java.util.ArrayList) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) Cursor(org.apache.druid.segment.Cursor) DataSegment(org.apache.druid.timeline.DataSegment) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) File(java.io.File) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter)

Example 7 with QueryableIndexStorageAdapter

use of org.apache.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.

the class IndexTaskTest method testTransformSpec.

@Test
public void testTransformSpec() throws Exception {
    File tmpDir = temporaryFolder.newFolder();
    File tmpFile = File.createTempFile("druid", "index", tmpDir);
    try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
        writer.write("2014-01-01T00:00:10Z,a,an|array,1|2|3,1\n");
        writer.write("2014-01-01T01:00:20Z,b,another|array,3|4,1\n");
        writer.write("2014-01-01T02:00:30Z,c,and|another,0|1,1\n");
    }
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "dim", "dim_array", "dim_num_array", "dimt", "dimtarray1", "dimtarray2", "dimtnum_array")));
    final List<String> columns = Arrays.asList("ts", "dim", "dim_array", "dim_num_array", "val");
    final String listDelimiter = "|";
    final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim", "b", null), ImmutableList.of(new ExpressionTransform("dimt", "concat(dim,dim)", ExprMacroTable.nil()), new ExpressionTransform("dimtarray1", "array(dim, dim)", ExprMacroTable.nil()), new ExpressionTransform("dimtarray2", "map(d -> concat(d, 'foo'), dim_array)", ExprMacroTable.nil()), new ExpressionTransform("dimtnum_array", "map(d -> d + 3, dim_num_array)", ExprMacroTable.nil())));
    final IndexTuningConfig tuningConfig = createTuningConfigWithMaxRowsPerSegment(2, false);
    final IndexIngestionSpec indexIngestionSpec;
    if (useInputFormatApi) {
        indexIngestionSpec = createIngestionSpec(jsonMapper, tmpDir, DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, new CsvInputFormat(columns, listDelimiter, null, false, 0), transformSpec, null, tuningConfig, false, false);
    } else {
        indexIngestionSpec = createIngestionSpec(jsonMapper, tmpDir, new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, listDelimiter, columns, false, 0), transformSpec, null, tuningConfig, false, false);
    }
    IndexTask indexTask = new IndexTask(null, null, indexIngestionSpec, null);
    Assert.assertEquals(indexTask.getId(), indexTask.getGroupId());
    final List<DataSegment> segments = runTask(indexTask).rhs;
    Assert.assertEquals(1, segments.size());
    DataSegment segment = segments.get(0);
    final File segmentFile = segmentCacheManager.getSegmentFiles(segment);
    final WindowedStorageAdapter adapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(segmentFile)), segment.getInterval());
    final Sequence<Cursor> cursorSequence = adapter.getAdapter().makeCursors(null, segment.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
    final List<Map<String, Object>> transforms = cursorSequence.map(cursor -> {
        final DimensionSelector selector1 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimt", "dimt"));
        final DimensionSelector selector2 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtarray1", "dimtarray1"));
        final DimensionSelector selector3 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtarray2", "dimtarray2"));
        final DimensionSelector selector4 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtnum_array", "dimtnum_array"));
        Map<String, Object> row = new HashMap<>();
        row.put("dimt", selector1.defaultGetObject());
        row.put("dimtarray1", selector2.defaultGetObject());
        row.put("dimtarray2", selector3.defaultGetObject());
        row.put("dimtnum_array", selector4.defaultGetObject());
        cursor.advance();
        return row;
    }).toList();
    Assert.assertEquals(1, transforms.size());
    Assert.assertEquals("bb", transforms.get(0).get("dimt"));
    Assert.assertEquals(ImmutableList.of("b", "b"), transforms.get(0).get("dimtarray1"));
    Assert.assertEquals(ImmutableList.of("anotherfoo", "arrayfoo"), transforms.get(0).get("dimtarray2"));
    Assert.assertEquals(ImmutableList.of("6.0", "7.0"), transforms.get(0).get("dimtnum_array"));
    Assert.assertEquals(DATASOURCE, segments.get(0).getDataSource());
    Assert.assertEquals(Intervals.of("2014/P1D"), segments.get(0).getInterval());
    Assert.assertEquals(NumberedShardSpec.class, segments.get(0).getShardSpec().getClass());
    Assert.assertEquals(0, segments.get(0).getShardSpec().getPartitionNum());
}
Also used : TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) Arrays(java.util.Arrays) IndexSpec(org.apache.druid.segment.IndexSpec) Pair(org.apache.druid.java.util.common.Pair) Map(java.util.Map) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) AppenderatorsManager(org.apache.druid.segment.realtime.appenderator.AppenderatorsManager) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) IAE(org.apache.druid.java.util.common.IAE) InputFormat(org.apache.druid.data.input.InputFormat) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) Set(java.util.Set) NoopSegmentHandoffNotifierFactory(org.apache.druid.segment.realtime.plumber.NoopSegmentHandoffNotifierFactory) EqualsVerifier(nl.jqno.equalsverifier.EqualsVerifier) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) StandardCharsets(java.nio.charset.StandardCharsets) TaskState(org.apache.druid.indexer.TaskState) CountDownLatch(java.util.concurrent.CountDownLatch) PartitionIds(org.apache.druid.timeline.partition.PartitionIds) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) RowIngestionMetersFactory(org.apache.druid.segment.incremental.RowIngestionMetersFactory) SegmentLocalCacheManager(org.apache.druid.segment.loading.SegmentLocalCacheManager) SegmentId(org.apache.druid.timeline.SegmentId) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) SegmentLoaderConfig(org.apache.druid.segment.loading.SegmentLoaderConfig) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RunWith(org.junit.runner.RunWith) TaskStatus(org.apache.druid.indexer.TaskStatus) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) Interval(org.joda.time.Interval) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) HashPartitionFunction(org.apache.druid.timeline.partition.HashPartitionFunction) Before(org.junit.Before) BufferedWriter(java.io.BufferedWriter) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) EasyMock(org.easymock.EasyMock) File(java.io.File) Preconditions(com.google.common.base.Preconditions) Assert(org.junit.Assert) DataSchema(org.apache.druid.segment.indexing.DataSchema) CoreMatchers(org.hamcrest.CoreMatchers) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) IndexIOConfig(org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) Event(org.apache.druid.java.util.emitter.core.Event) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) TypeReference(com.fasterxml.jackson.core.type.TypeReference) Parameterized(org.junit.runners.Parameterized) ParseSpec(org.apache.druid.data.input.impl.ParseSpec) Sequence(org.apache.druid.java.util.common.guava.Sequence) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) LocalFirehoseFactory(org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory) ImmutableMap(com.google.common.collect.ImmutableMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) StringUtils(org.apache.druid.java.util.common.StringUtils) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) LockGranularity(org.apache.druid.indexing.common.LockGranularity) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegment(org.apache.druid.timeline.DataSegment) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) SegmentAllocateAction(org.apache.druid.indexing.common.actions.SegmentAllocateAction) Intervals(org.apache.druid.java.util.common.Intervals) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) Files(com.google.common.io.Files) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) DimensionSelector(org.apache.druid.segment.DimensionSelector) ExpectedException(org.junit.rules.ExpectedException) SegmentHandoffNotifier(org.apache.druid.segment.handoff.SegmentHandoffNotifier) NoopServiceEmitter(org.apache.druid.server.metrics.NoopServiceEmitter) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) VirtualColumns(org.apache.druid.segment.VirtualColumns) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) StorageLocationConfig(org.apache.druid.segment.loading.StorageLocationConfig) Granularities(org.apache.druid.java.util.common.granularity.Granularities) TimeUnit(java.util.concurrent.TimeUnit) Rule(org.junit.Rule) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) Cursor(org.apache.druid.segment.Cursor) SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) Cursor(org.apache.druid.segment.Cursor) DataSegment(org.apache.druid.timeline.DataSegment) BufferedWriter(java.io.BufferedWriter) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) DimensionSelector(org.apache.druid.segment.DimensionSelector) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) TransformSpec(org.apache.druid.segment.transform.TransformSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) File(java.io.File) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 8 with QueryableIndexStorageAdapter

use of org.apache.druid.segment.QueryableIndexStorageAdapter in project hive by apache.

the class TestDruidRecordWriter method testWrite.

// Test is failing due to Guava dependency, Druid 0.13.0 should have less dependency on Guava
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
    final String dataSourceName = "testDataSource";
    final File segmentOutputDir = temporaryFolder.newFolder();
    final File workingDir = temporaryFolder.newFolder();
    Configuration config = new Configuration();
    final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidConstants.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("host")), null, null)));
    final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, new TypeReference<Map<String, Object>>() {
    });
    DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, objectMapper);
    IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
    RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, null, null, null, temporaryFolder.newFolder(), null, null, null, null, indexSpec, null, null, 0, 0, null, null, 0L, null, null);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {

        @Override
        public File getStorageDirectory() {
            return segmentOutputDir;
        }
    });
    Path segmentDescriptorPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
    DruidRecordWriter druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptorPath, localFileSystem);
    List<DruidWritable> druidWritables = expectedRows.stream().map(input -> new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularities.DAY.bucketStart(new DateTime((long) input.get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build())).collect(Collectors.toList());
    for (DruidWritable druidWritable : druidWritables) {
        druidRecordWriter.write(druidWritable);
    }
    druidRecordWriter.close(false);
    List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getCreatedSegments(segmentDescriptorPath, config);
    Assert.assertEquals(1, dataSegmentList.size());
    File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
    new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
    final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
    QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
    Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), null, ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null);
    List<InputRow> rows = Lists.newArrayList();
    while (firehose.hasMore()) {
        rows.add(firehose.nextRow());
    }
    verifyRows(expectedRows, rows);
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) DruidConstants(org.apache.hadoop.hive.druid.conf.DruidConstants) IndexSpec(org.apache.druid.segment.IndexSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DruidRecordWriter(org.apache.hadoop.hive.druid.io.DruidRecordWriter) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) TypeReference(com.fasterxml.jackson.core.type.TypeReference) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) ImmutableMap(com.google.common.collect.ImmutableMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) QueryableIndex(org.apache.druid.segment.QueryableIndex) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) Collectors(java.util.stream.Collectors) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) InputRow(org.apache.druid.data.input.InputRow) Firehose(org.apache.druid.data.input.Firehose) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DataSegment(org.apache.druid.timeline.DataSegment) DruidTable(org.apache.calcite.adapter.druid.DruidTable) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) DruidStorageHandlerUtils(org.apache.hadoop.hive.druid.DruidStorageHandlerUtils) Constants(org.apache.hadoop.hive.conf.Constants) DruidStorageHandler(org.apache.hadoop.hive.druid.DruidStorageHandler) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) File(java.io.File) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Rule(org.junit.Rule) Ignore(org.junit.Ignore) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) Assert(org.junit.Assert) DataSchema(org.apache.druid.segment.indexing.DataSchema) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) TemporaryFolder(org.junit.rules.TemporaryFolder) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) IndexSpec(org.apache.druid.segment.IndexSpec) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) Configuration(org.apache.hadoop.conf.Configuration) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DataSegment(org.apache.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) Path(org.apache.hadoop.fs.Path) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) Firehose(org.apache.druid.data.input.Firehose) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DataSchema(org.apache.druid.segment.indexing.DataSchema) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) QueryableIndex(org.apache.druid.segment.QueryableIndex) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) File(java.io.File) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) DruidRecordWriter(org.apache.hadoop.hive.druid.io.DruidRecordWriter) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 9 with QueryableIndexStorageAdapter

use of org.apache.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.

the class ExpressionAggregationBenchmark method compute.

private double compute(final Function<ColumnSelectorFactory, BufferAggregator> aggregatorFactory) {
    final QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(index);
    final Sequence<Cursor> cursors = adapter.makeCursors(null, index.getDataInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
    final List<Double> results = cursors.map(cursor -> {
        final BufferAggregator bufferAggregator = aggregatorFactory.apply(cursor.getColumnSelectorFactory());
        bufferAggregator.init(aggregationBuffer, 0);
        while (!cursor.isDone()) {
            bufferAggregator.aggregate(aggregationBuffer, 0);
            cursor.advance();
        }
        final Double dbl = (Double) bufferAggregator.get(aggregationBuffer, 0);
        bufferAggregator.close();
        return dbl;
    }).toList();
    return Iterables.getOnlyElement(results);
}
Also used : Iterables(com.google.common.collect.Iterables) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Measurement(org.openjdk.jmh.annotations.Measurement) Intervals(org.apache.druid.java.util.common.Intervals) RuntimeShapeInspector(org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector) SegmentGenerator(org.apache.druid.segment.generator.SegmentGenerator) Blackhole(org.openjdk.jmh.infra.Blackhole) Scope(org.openjdk.jmh.annotations.Scope) Warmup(org.openjdk.jmh.annotations.Warmup) Function(java.util.function.Function) JavaScriptConfig(org.apache.druid.js.JavaScriptConfig) ByteBuffer(java.nio.ByteBuffer) BaseFloatColumnValueSelector(org.apache.druid.segment.BaseFloatColumnValueSelector) BufferAggregator(org.apache.druid.query.aggregation.BufferAggregator) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) ImmutableList(com.google.common.collect.ImmutableList) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit) TearDown(org.openjdk.jmh.annotations.TearDown) Sequence(org.apache.druid.java.util.common.guava.Sequence) Setup(org.openjdk.jmh.annotations.Setup) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) VirtualColumns(org.apache.druid.segment.VirtualColumns) GeneratorColumnSchema(org.apache.druid.segment.generator.GeneratorColumnSchema) Mode(org.openjdk.jmh.annotations.Mode) Closer(org.apache.druid.java.util.common.io.Closer) Param(org.openjdk.jmh.annotations.Param) QueryableIndex(org.apache.druid.segment.QueryableIndex) JavaScriptAggregatorFactory(org.apache.druid.query.aggregation.JavaScriptAggregatorFactory) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) ValueType(org.apache.druid.segment.column.ValueType) TestExprMacroTable(org.apache.druid.query.expression.TestExprMacroTable) State(org.openjdk.jmh.annotations.State) Benchmark(org.openjdk.jmh.annotations.Benchmark) Granularities(org.apache.druid.java.util.common.granularity.Granularities) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Cursor(org.apache.druid.segment.Cursor) NullHandling(org.apache.druid.common.config.NullHandling) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) Level(org.openjdk.jmh.annotations.Level) Fork(org.openjdk.jmh.annotations.Fork) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) Cursor(org.apache.druid.segment.Cursor) BufferAggregator(org.apache.druid.query.aggregation.BufferAggregator)

Example 10 with QueryableIndexStorageAdapter

use of org.apache.druid.segment.QueryableIndexStorageAdapter in project druid by druid-io.

the class ExpressionSelectorBenchmark method timeFloorUsingExtractionFn.

@Benchmark
public void timeFloorUsingExtractionFn(Blackhole blackhole) {
    final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(null, index.getDataInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
    final List<?> results = cursors.map(cursor -> {
        final DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector(new ExtractionDimensionSpec(ColumnHolder.TIME_COLUMN_NAME, "v", new TimeFormatExtractionFn(null, null, null, Granularities.HOUR, true)));
        consumeDimension(cursor, selector, blackhole);
        return null;
    }).toList();
    blackhole.consume(results);
}
Also used : BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Measurement(org.openjdk.jmh.annotations.Measurement) Intervals(org.apache.druid.java.util.common.Intervals) SegmentGenerator(org.apache.druid.segment.generator.SegmentGenerator) Blackhole(org.openjdk.jmh.infra.Blackhole) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector) Scope(org.openjdk.jmh.annotations.Scope) Warmup(org.openjdk.jmh.annotations.Warmup) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit) DimensionSelector(org.apache.druid.segment.DimensionSelector) TearDown(org.openjdk.jmh.annotations.TearDown) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) ExpressionVirtualColumn(org.apache.druid.segment.virtual.ExpressionVirtualColumn) Sequence(org.apache.druid.java.util.common.guava.Sequence) Setup(org.openjdk.jmh.annotations.Setup) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) VirtualColumns(org.apache.druid.segment.VirtualColumns) GeneratorColumnSchema(org.apache.druid.segment.generator.GeneratorColumnSchema) Mode(org.openjdk.jmh.annotations.Mode) Closer(org.apache.druid.java.util.common.io.Closer) TimeFormatExtractionFn(org.apache.druid.query.extraction.TimeFormatExtractionFn) Param(org.openjdk.jmh.annotations.Param) QueryableIndex(org.apache.druid.segment.QueryableIndex) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) ValueType(org.apache.druid.segment.column.ValueType) TestExprMacroTable(org.apache.druid.query.expression.TestExprMacroTable) State(org.openjdk.jmh.annotations.State) StrlenExtractionFn(org.apache.druid.query.extraction.StrlenExtractionFn) Benchmark(org.openjdk.jmh.annotations.Benchmark) Granularities(org.apache.druid.java.util.common.granularity.Granularities) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Cursor(org.apache.druid.segment.Cursor) NullHandling(org.apache.druid.common.config.NullHandling) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) Level(org.openjdk.jmh.annotations.Level) ColumnType(org.apache.druid.segment.column.ColumnType) Fork(org.openjdk.jmh.annotations.Fork) BitSet(java.util.BitSet) TimeFormatExtractionFn(org.apache.druid.query.extraction.TimeFormatExtractionFn) DimensionSelector(org.apache.druid.segment.DimensionSelector) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) Cursor(org.apache.druid.segment.Cursor) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) Benchmark(org.openjdk.jmh.annotations.Benchmark)

Aggregations

QueryableIndexStorageAdapter (org.apache.druid.segment.QueryableIndexStorageAdapter)46 Cursor (org.apache.druid.segment.Cursor)35 Benchmark (org.openjdk.jmh.annotations.Benchmark)28 BenchmarkMode (org.openjdk.jmh.annotations.BenchmarkMode)28 OutputTimeUnit (org.openjdk.jmh.annotations.OutputTimeUnit)28 List (java.util.List)22 QueryableIndex (org.apache.druid.segment.QueryableIndex)21 DataSegment (org.apache.druid.timeline.DataSegment)21 ImmutableList (com.google.common.collect.ImmutableList)20 Granularities (org.apache.druid.java.util.common.granularity.Granularities)20 Sequence (org.apache.druid.java.util.common.guava.Sequence)20 VirtualColumns (org.apache.druid.segment.VirtualColumns)19 StorageAdapter (org.apache.druid.segment.StorageAdapter)18 TimeUnit (java.util.concurrent.TimeUnit)16 Intervals (org.apache.druid.java.util.common.Intervals)16 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)16 TestExprMacroTable (org.apache.druid.query.expression.TestExprMacroTable)16 GeneratorSchemaInfo (org.apache.druid.segment.generator.GeneratorSchemaInfo)16 SegmentGenerator (org.apache.druid.segment.generator.SegmentGenerator)16 LinearShardSpec (org.apache.druid.timeline.partition.LinearShardSpec)16