Search in sources :

Example 16 with HyperUniquesAggregatorFactory

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project hive by apache.

the class TestDruidRecordWriter method testWrite.

// Test is failing due to Guava dependency, Druid 0.13.0 should have less dependency on Guava
@Ignore
@Test
public void testWrite() throws IOException, SegmentLoadingException {
    final String dataSourceName = "testDataSource";
    final File segmentOutputDir = temporaryFolder.newFolder();
    final File workingDir = temporaryFolder.newFolder();
    Configuration config = new Configuration();
    final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidConstants.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("host")), null, null)));
    final Map<String, Object> parserMap = objectMapper.convertValue(inputRowParser, new TypeReference<Map<String, Object>>() {
    });
    DataSchema dataSchema = new DataSchema(dataSourceName, parserMap, new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts", "unique_hosts") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, objectMapper);
    IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
    RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, null, null, null, temporaryFolder.newFolder(), null, null, null, null, indexSpec, null, null, 0, 0, null, null, 0L, null, null);
    LocalFileSystem localFileSystem = FileSystem.getLocal(config);
    DataSegmentPusher dataSegmentPusher = new LocalDataSegmentPusher(new LocalDataSegmentPusherConfig() {

        @Override
        public File getStorageDirectory() {
            return segmentOutputDir;
        }
    });
    Path segmentDescriptorPath = new Path(workingDir.getAbsolutePath(), DruidStorageHandler.SEGMENTS_DESCRIPTOR_DIR_NAME);
    DruidRecordWriter druidRecordWriter = new DruidRecordWriter(dataSchema, tuningConfig, dataSegmentPusher, 20, segmentDescriptorPath, localFileSystem);
    List<DruidWritable> druidWritables = expectedRows.stream().map(input -> new DruidWritable(ImmutableMap.<String, Object>builder().putAll(input).put(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Granularities.DAY.bucketStart(new DateTime((long) input.get(DruidConstants.DEFAULT_TIMESTAMP_COLUMN))).getMillis()).build())).collect(Collectors.toList());
    for (DruidWritable druidWritable : druidWritables) {
        druidRecordWriter.write(druidWritable);
    }
    druidRecordWriter.close(false);
    List<DataSegment> dataSegmentList = DruidStorageHandlerUtils.getCreatedSegments(segmentDescriptorPath, config);
    Assert.assertEquals(1, dataSegmentList.size());
    File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
    new LocalDataSegmentPuller().getSegmentFiles(dataSegmentList.get(0), tmpUnzippedSegmentDir);
    final QueryableIndex queryableIndex = DruidStorageHandlerUtils.INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
    QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(queryableIndex);
    Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, adapter.getInterval())), null, ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null);
    List<InputRow> rows = Lists.newArrayList();
    while (firehose.hasMore()) {
        rows.add(firehose.nextRow());
    }
    verifyRows(expectedRows, rows);
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) DruidConstants(org.apache.hadoop.hive.druid.conf.DruidConstants) IndexSpec(org.apache.druid.segment.IndexSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DruidRecordWriter(org.apache.hadoop.hive.druid.io.DruidRecordWriter) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) TypeReference(com.fasterxml.jackson.core.type.TypeReference) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) ImmutableMap(com.google.common.collect.ImmutableMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) QueryableIndex(org.apache.druid.segment.QueryableIndex) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) Collectors(java.util.stream.Collectors) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) InputRow(org.apache.druid.data.input.InputRow) Firehose(org.apache.druid.data.input.Firehose) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DataSegment(org.apache.druid.timeline.DataSegment) DruidTable(org.apache.calcite.adapter.druid.DruidTable) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) DruidStorageHandlerUtils(org.apache.hadoop.hive.druid.DruidStorageHandlerUtils) Constants(org.apache.hadoop.hive.conf.Constants) DruidStorageHandler(org.apache.hadoop.hive.druid.DruidStorageHandler) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) File(java.io.File) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Rule(org.junit.Rule) Ignore(org.junit.Ignore) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) Assert(org.junit.Assert) DataSchema(org.apache.druid.segment.indexing.DataSchema) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) TemporaryFolder(org.junit.rules.TemporaryFolder) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) IndexSpec(org.apache.druid.segment.IndexSpec) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) Configuration(org.apache.hadoop.conf.Configuration) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DataSegment(org.apache.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) Path(org.apache.hadoop.fs.Path) IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) Firehose(org.apache.druid.data.input.Firehose) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) LocalDataSegmentPusher(org.apache.druid.segment.loading.LocalDataSegmentPusher) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DataSchema(org.apache.druid.segment.indexing.DataSchema) DruidWritable(org.apache.hadoop.hive.druid.serde.DruidWritable) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) QueryableIndex(org.apache.druid.segment.QueryableIndex) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) File(java.io.File) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) DruidRecordWriter(org.apache.hadoop.hive.druid.io.DruidRecordWriter) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 17 with HyperUniquesAggregatorFactory

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class TimeseriesBenchmark method setupQueries.

private void setupQueries() {
    // queries for the basic schema
    Map<String, TimeseriesQuery> basicQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
    {
        // basic.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
        queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
        queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
        queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
        TimeseriesQuery queryA = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(descending).build();
        basicQueries.put("A", queryA);
    }
    {
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        LongSumAggregatorFactory lsaf = new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential");
        BoundDimFilter timeFilter = new BoundDimFilter(ColumnHolder.TIME_COLUMN_NAME, "200000", "300000", false, false, null, null, StringComparators.NUMERIC);
        queryAggs.add(new FilteredAggregatorFactory(lsaf, timeFilter));
        TimeseriesQuery timeFilterQuery = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(descending).build();
        basicQueries.put("timeFilterNumeric", timeFilterQuery);
    }
    {
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        LongSumAggregatorFactory lsaf = new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential");
        BoundDimFilter timeFilter = new BoundDimFilter(ColumnHolder.TIME_COLUMN_NAME, "200000", "300000", false, false, null, null, StringComparators.ALPHANUMERIC);
        queryAggs.add(new FilteredAggregatorFactory(lsaf, timeFilter));
        TimeseriesQuery timeFilterQuery = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(descending).build();
        basicQueries.put("timeFilterAlphanumeric", timeFilterQuery);
    }
    {
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.utc(200000, 300000)));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        LongSumAggregatorFactory lsaf = new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential");
        queryAggs.add(lsaf);
        TimeseriesQuery timeFilterQuery = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(descending).build();
        basicQueries.put("timeFilterByInterval", timeFilterQuery);
    }
    SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) DoubleMinAggregatorFactory(org.apache.druid.query.aggregation.DoubleMinAggregatorFactory) LinkedHashMap(java.util.LinkedHashMap) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) List(java.util.List) ArrayList(java.util.ArrayList) LongMaxAggregatorFactory(org.apache.druid.query.aggregation.LongMaxAggregatorFactory)

Example 18 with HyperUniquesAggregatorFactory

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class TopNTypeInterfaceBenchmark method setupQueries.

private void setupQueries() {
    // queries for the basic schema
    Map<String, TopNQueryBuilder> basicQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
    {
        // basic.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
        queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
        queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
        queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
        // Use an IdentityExtractionFn to force usage of HeapBasedTopNAlgorithm
        TopNQueryBuilder queryBuilderString = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension(new ExtractionDimensionSpec("dimSequential", "dimSequential", IdentityExtractionFn.getInstance())).metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
        // HeapBasedTopNAlgorithm is always used for numeric columns
        TopNQueryBuilder queryBuilderLong = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metLongUniform").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
        TopNQueryBuilder queryBuilderFloat = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metFloatNormal").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("string", queryBuilderString);
        basicQueries.put("long", queryBuilderLong);
        basicQueries.put("float", queryBuilderFloat);
    }
    {
        // basic.numericSort
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("numericSort", queryBuilderA);
    }
    {
        // basic.alphanumericSort
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.ALPHANUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("alphanumericSort", queryBuilderA);
    }
    SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
Also used : TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) DoubleMinAggregatorFactory(org.apache.druid.query.aggregation.DoubleMinAggregatorFactory) LinkedHashMap(java.util.LinkedHashMap) DimensionTopNMetricSpec(org.apache.druid.query.topn.DimensionTopNMetricSpec) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) List(java.util.List) ArrayList(java.util.ArrayList) LongMaxAggregatorFactory(org.apache.druid.query.aggregation.LongMaxAggregatorFactory) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec)

Example 19 with HyperUniquesAggregatorFactory

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class TopNBenchmark method setupQueries.

private void setupQueries() {
    // queries for the basic schema
    Map<String, TopNQueryBuilder> basicQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
    {
        // basic.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
        queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
        queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
        queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimSequential").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("A", queryBuilderA);
    }
    {
        // basic.numericSort
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("numericSort", queryBuilderA);
    }
    {
        // basic.alphanumericSort
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.ALPHANUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("alphanumericSort", queryBuilderA);
    }
    SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
Also used : TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) DoubleMinAggregatorFactory(org.apache.druid.query.aggregation.DoubleMinAggregatorFactory) LinkedHashMap(java.util.LinkedHashMap) DimensionTopNMetricSpec(org.apache.druid.query.topn.DimensionTopNMetricSpec) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) List(java.util.List) ArrayList(java.util.ArrayList) LongMaxAggregatorFactory(org.apache.druid.query.aggregation.LongMaxAggregatorFactory)

Example 20 with HyperUniquesAggregatorFactory

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class DruidSegmentReaderTest method setUp.

@Before
public void setUp() throws IOException {
    // Write a segment with two rows in it, with columns: s (string), d (double), cnt (long), met_s (complex).
    final IncrementalIndex incrementalIndex = IndexBuilder.create().schema(new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("s"), new DoubleDimensionSchema("d")))).withMetrics(new CountAggregatorFactory("cnt"), new HyperUniquesAggregatorFactory("met_s", "s")).withRollup(false).build()).rows(ImmutableList.of(new MapBasedInputRow(DateTimes.of("2000"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("s", "foo").put("d", 1.23).build()), new MapBasedInputRow(DateTimes.of("2000T01"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("s", "bar").put("d", 4.56).build()))).buildIncrementalIndex();
    segmentDirectory = temporaryFolder.newFolder();
    try {
        TestHelper.getTestIndexMergerV9(OnHeapMemorySegmentWriteOutMediumFactory.instance()).persist(incrementalIndex, segmentDirectory, new IndexSpec(), null);
    } finally {
        incrementalIndex.close();
    }
}
Also used : DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) IndexSpec(org.apache.druid.segment.IndexSpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) Before(org.junit.Before)

Aggregations

HyperUniquesAggregatorFactory (org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)25 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)16 Test (org.junit.Test)12 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)10 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)9 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)7 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)7 File (java.io.File)5 ArrayList (java.util.ArrayList)5 List (java.util.List)5 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)5 CardinalityAggregatorFactory (org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory)5 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)5 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)5 Before (org.junit.Before)5 ImmutableMap (com.google.common.collect.ImmutableMap)4 Map (java.util.Map)4 InputRow (org.apache.druid.data.input.InputRow)4 DoubleDimensionSchema (org.apache.druid.data.input.impl.DoubleDimensionSchema)4 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)4